Skip to content

Commit

Permalink
Merge pull request #108 from constellation-rs/miri
Browse files Browse the repository at this point in the history
Add miri to CI
  • Loading branch information
alecmocatta authored Aug 17, 2020
2 parents 8ad3cc8 + 4a4c9f3 commit 4c2d2ec
Show file tree
Hide file tree
Showing 47 changed files with 253 additions and 149 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async-channel = "1.1"
bincode = { version = "1.3", optional = true }
constellation-rs = { version = "0.2.0-alpha.2", default-features = false, optional = true }
derive-new = "0.5"
event-listener = "=2.3.1" # https://github.com/stjepang/event-listener/issues/9
event-listener = "2.3.3"
futures = "0.3"
num_cpus = "1.13"
pin-project = "0.4"
Expand Down
2 changes: 1 addition & 1 deletion amadeus-parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ educe = "0.4"
flate2 = { version = "1.0.2", features = ["rust_backend"], default-features = false }
futures = "0.3"
fxhash = "0.2"
linked-hash-map = "0.5"
hashlink = "0.5"
lz-fear = "0.1"
num-bigint = "0.3"
quick-error = "1.2.2"
Expand Down
7 changes: 7 additions & 0 deletions amadeus-parquet/src/internal/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,26 +290,31 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_codec_snappy() {
test_codec(CodecType::Snappy);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_codec_gzip() {
test_codec(CodecType::Gzip);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_codec_brotli() {
test_codec(CodecType::Brotli);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_codec_lz4() {
test_codec(CodecType::Lz4);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_codec_zstd() {
test_codec(CodecType::Zstd);
}
Expand Down Expand Up @@ -350,6 +355,7 @@ mod tests {
macro_rules! compress {
($fname:ident, $codec:expr, $col_idx:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mut codec = create_codec($codec).unwrap().unwrap();
let data = get_pages_bytes($col_idx);
Expand All @@ -366,6 +372,7 @@ mod tests {
macro_rules! decompress {
($fname:ident, $codec:expr, $col_idx:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let compressed_pages = {
let mut codec = create_codec($codec).unwrap().unwrap();
Expand Down
5 changes: 4 additions & 1 deletion amadeus-parquet/src/internal/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ impl<T: DataType> Decoder<T> for PlainDecoder<T> {
return Err(eof_err!("Not enough bytes to decode"));
}
let raw_buffer: &mut [u8] =
unsafe { from_raw_parts_mut(buffer.as_ptr() as *mut u8, bytes_to_decode) };
unsafe { from_raw_parts_mut(buffer.as_mut_ptr() as *mut u8, bytes_to_decode) };
raw_buffer.copy_from_slice(data.range(self.start, bytes_to_decode).as_ref());
self.start += bytes_to_decode;
self.num_values -= num_values;
Expand Down Expand Up @@ -1489,6 +1489,7 @@ mod tests {
macro_rules! plain {
($fname:ident, $num_values:expr, $batch_size:expr, $ty:ident, $pty:expr, $gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mem_tracker = Rc::new(MemTracker::new());
let mut encoder =
Expand All @@ -1508,6 +1509,7 @@ mod tests {
($fname:ident, $num_values:expr, $batch_size:expr, $ty:ident, $pty:expr,
$gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mem_tracker = Rc::new(MemTracker::new());
let mut encoder = DictEncoder::<$ty>::new(Rc::new(col_desc(0, $pty)), mem_tracker);
Expand Down Expand Up @@ -1536,6 +1538,7 @@ mod tests {
macro_rules! delta_bit_pack {
($fname:ident, $num_values:expr, $batch_size:expr, $ty:ident, $gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mut encoder = DeltaBitPackEncoder::<$ty>::new();

Expand Down
58 changes: 34 additions & 24 deletions amadeus-parquet/src/internal/encodings/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -631,12 +631,7 @@ impl<T: DataType> DeltaBitPackEncoder<T> {
// Write min delta
self.bit_writer.put_zigzag_vlq_int(min_delta)?;

// Slice to store bit width for each mini block
// apply unsafe allocation to avoid double mutable borrow
let mini_block_widths: &mut [u8] = unsafe {
let tmp_slice = self.bit_writer.get_next_byte_ptr(self.num_mini_blocks)?;
slice::from_raw_parts_mut(tmp_slice.as_ptr() as *mut u8, self.num_mini_blocks)
};
let offset = self.bit_writer.skip(self.num_mini_blocks)?;

for i in 0..self.num_mini_blocks {
// Find how many values we need to encode - either block size or whatever
Expand All @@ -653,13 +648,13 @@ impl<T: DataType> DeltaBitPackEncoder<T> {
}

// Compute bit width to store (max_delta - min_delta)
let bit_width = num_required_bits(self.subtract_u64(max_delta, min_delta));
mini_block_widths[i] = bit_width as u8;
let bit_width = num_required_bits(Self::subtract_u64(max_delta, min_delta));
self.bit_writer.buffer_set(offset + i, bit_width as u8);

// Encode values in current mini block using min_delta and bit_width
for j in 0..n {
let packed_value =
self.subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
Self::subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
self.bit_writer.put_value(packed_value, bit_width)?;
}

Expand Down Expand Up @@ -691,7 +686,7 @@ impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
let mut idx;
// Define values to encode, initialize state
if self.total_values == 0 {
self.first_value = self.as_i64(values, 0);
self.first_value = Self::as_i64(values, 0);
self.current_value = self.first_value;
idx = 1;
} else {
Expand All @@ -702,8 +697,8 @@ impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {

// Write block
while idx < values.len() {
let value = self.as_i64(values, idx);
self.deltas[self.values_in_block] = self.subtract(value, self.current_value);
let value = Self::as_i64(values, idx);
self.deltas[self.values_in_block] = Self::subtract(value, self.current_value);
self.current_value = value;
idx += 1;
self.values_in_block += 1;
Expand Down Expand Up @@ -750,11 +745,11 @@ trait DeltaBitPackEncoderConversion<T: DataType> {
// Method should panic if type is not supported, otherwise no-op
fn assert_supported_type();

fn as_i64(&self, values: &[T::Type], index: usize) -> i64;
fn as_i64(values: &[T::Type], index: usize) -> i64;

fn subtract(&self, left: i64, right: i64) -> i64;
fn subtract(left: i64, right: i64) -> i64;

fn subtract_u64(&self, left: i64, right: i64) -> u64;
fn subtract_u64(left: i64, right: i64) -> u64;
}

impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
Expand All @@ -764,17 +759,17 @@ impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
}

#[inline]
default fn as_i64(&self, _values: &[T::Type], _index: usize) -> i64 {
default fn as_i64(_values: &[T::Type], _index: usize) -> i64 {
0
}

#[inline]
default fn subtract(&self, _left: i64, _right: i64) -> i64 {
default fn subtract(_left: i64, _right: i64) -> i64 {
0
}

#[inline]
default fn subtract_u64(&self, _left: i64, _right: i64) -> u64 {
default fn subtract_u64(_left: i64, _right: i64) -> u64 {
0
}
}
Expand All @@ -786,18 +781,18 @@ impl DeltaBitPackEncoderConversion<Int32Type> for DeltaBitPackEncoder<Int32Type>
}

#[inline]
fn as_i64(&self, values: &[i32], index: usize) -> i64 {
fn as_i64(values: &[i32], index: usize) -> i64 {
values[index] as i64
}

#[inline]
fn subtract(&self, left: i64, right: i64) -> i64 {
fn subtract(left: i64, right: i64) -> i64 {
// It is okay for values to overflow, wrapping_sub wrapping around at the boundary
(left as i32).wrapping_sub(right as i32) as i64
}

#[inline]
fn subtract_u64(&self, left: i64, right: i64) -> u64 {
fn subtract_u64(left: i64, right: i64) -> u64 {
// Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int
// representation
(left as i32).wrapping_sub(right as i32) as u32 as u64
Expand All @@ -811,18 +806,18 @@ impl DeltaBitPackEncoderConversion<Int64Type> for DeltaBitPackEncoder<Int64Type>
}

#[inline]
fn as_i64(&self, values: &[i64], index: usize) -> i64 {
fn as_i64(values: &[i64], index: usize) -> i64 {
values[index]
}

#[inline]
fn subtract(&self, left: i64, right: i64) -> i64 {
fn subtract(left: i64, right: i64) -> i64 {
// It is okay for values to overflow, wrapping_sub wrapping around at the boundary
left.wrapping_sub(right)
}

#[inline]
fn subtract_u64(&self, left: i64, right: i64) -> u64 {
fn subtract_u64(left: i64, right: i64) -> u64 {
left.wrapping_sub(right) as u64
}
}
Expand Down Expand Up @@ -1013,6 +1008,7 @@ mod tests {
const TEST_SET_SIZE: usize = 1024;

#[test]
#[cfg_attr(miri, ignore)]
fn test_get_encoders() {
// supported encodings
create_and_check_encoder::<Int32Type>(Encoding::Plain, None);
Expand Down Expand Up @@ -1043,45 +1039,52 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_bool() {
BoolType::test(Encoding::Plain, TEST_SET_SIZE, -1);
BoolType::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
BoolType::test(Encoding::Rle, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_i32() {
Int32Type::test(Encoding::Plain, TEST_SET_SIZE, -1);
Int32Type::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
Int32Type::test(Encoding::DeltaBinaryPacked, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_i64() {
Int64Type::test(Encoding::Plain, TEST_SET_SIZE, -1);
Int64Type::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
Int64Type::test(Encoding::DeltaBinaryPacked, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_i96() {
Int96Type::test(Encoding::Plain, TEST_SET_SIZE, -1);
Int96Type::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_float() {
FloatType::test(Encoding::Plain, TEST_SET_SIZE, -1);
FloatType::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_double() {
DoubleType::test(Encoding::Plain, TEST_SET_SIZE, -1);
DoubleType::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_byte_array() {
ByteArrayType::test(Encoding::Plain, TEST_SET_SIZE, -1);
ByteArrayType::test(Encoding::PlainDictionary, TEST_SET_SIZE, -1);
Expand All @@ -1090,13 +1093,15 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_fixed_lenbyte_array() {
FixedLenByteArrayType::test(Encoding::Plain, TEST_SET_SIZE, 100);
FixedLenByteArrayType::test(Encoding::PlainDictionary, TEST_SET_SIZE, 100);
FixedLenByteArrayType::test(Encoding::DeltaByteArray, TEST_SET_SIZE, 100);
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_dict_encoded_size() {
fn run_test<T: DataType>(type_length: i32, values: &[T::Type], expected_size: usize) {
let mut encoder = create_test_dict_encoder::<T>(type_length);
Expand All @@ -1121,6 +1126,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_estimated_data_encoded_size() {
fn run_test<T: DataType>(
encoding: Encoding, type_length: i32, values: &[T::Type], initial_size: usize,
Expand Down Expand Up @@ -1181,6 +1187,7 @@ mod tests {

// See: https://github.com/sunchao/parquet-rs/issues/47
#[test]
#[cfg_attr(miri, ignore)]
fn test_issue_47() {
let mut encoder = create_test_encoder::<ByteArrayType>(0, Encoding::DeltaByteArray);
let mut decoder = create_test_decoder::<ByteArrayType>(0, Encoding::DeltaByteArray);
Expand Down Expand Up @@ -1383,6 +1390,7 @@ mod tests {
macro_rules! plain {
($fname:ident, $batch_size:expr, $ty:ident, $pty:expr, $gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mem_tracker = Rc::new(MemTracker::new());
let encoder =
Expand All @@ -1396,6 +1404,7 @@ mod tests {
macro_rules! dict {
($fname:ident, $batch_size:expr, $ty:ident, $pty:expr, $gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let mem_tracker = Rc::new(MemTracker::new());
let encoder = DictEncoder::<$ty>::new(Rc::new(col_desc(0, $pty)), mem_tracker);
Expand All @@ -1408,6 +1417,7 @@ mod tests {
macro_rules! delta_bit_pack {
($fname:ident, $batch_size:expr, $ty:ident, $gen_data_fn:expr) => {
#[bench]
#[cfg_attr(miri, ignore)]
fn $fname(bench: &mut Bencher) {
let encoder = DeltaBitPackEncoder::<$ty>::new();
let (bytes, values) = $gen_data_fn($batch_size);
Expand Down
5 changes: 3 additions & 2 deletions amadeus-parquet/src/internal/encodings/rle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_values() {
for width in 1..MAX_WIDTH + 1 {
test_rle_values(width, 1, -1);
Expand Down Expand Up @@ -801,8 +802,8 @@ mod tests {
#[test]
fn test_random() {
let seed_len = 32;
let niters = 50;
let ngroups = 1000;
let niters = if !cfg!(miri) { 50 } else { 1 };
let ngroups = if !cfg!(miri) { 1000 } else { 200 };
let max_group_size = 15;
let mut values = vec![];

Expand Down
Loading

0 comments on commit 4c2d2ec

Please sign in to comment.