Skip to content

Commit

Permalink
Skip unnecessary null checks in MutableArrayData (#4333)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Jun 1, 2023
1 parent b78d99d commit f261051
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 199 deletions.
32 changes: 6 additions & 26 deletions arrow-data/src/transform/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,12 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
};

let values = &array.buffers()[0].as_slice()[array.offset() * size..];
if array.null_count() == 0 {
// fast case where we can copy regions without null issues
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let buffer = &mut mutable.buffer1;
buffer.extend_from_slice(&values[start * size..(start + len) * size]);
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
// nulls present: append item by item, ignoring null entries
let values_buffer = &mut mutable.buffer1;

(start..start + len).for_each(|i| {
if array.is_valid(i) {
// append value
let bytes = &values[i * size..(i + 1) * size];
values_buffer.extend_from_slice(bytes);
} else {
values_buffer.extend_zeros(size);
}
})
},
)
}
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let buffer = &mut mutable.buffer1;
buffer.extend_from_slice(&values[start * size..(start + len) * size]);
},
)
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
Expand Down
40 changes: 8 additions & 32 deletions arrow-data/src/transform/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,38 +26,14 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
_ => unreachable!(),
};

if array.null_count() == 0 {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
mutable.child_data.iter_mut().for_each(|child| {
child.extend(index, start * size, (start + len) * size)
})
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
(start..start + len).for_each(|i| {
if array.is_valid(i) {
mutable.child_data.iter_mut().for_each(|child| {
child.extend(index, i * size, (i + 1) * size)
})
} else {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend_nulls(size))
}
})
},
)
}
Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, start * size, (start + len) * size))
},
)
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
Expand Down
81 changes: 21 additions & 60 deletions arrow-data/src/transform/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,66 +27,27 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);
if array.null_count() == 0 {
// fast case where we can copy regions without nullability checks
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
let offset_buffer = &mut mutable.buffer1;

// this is safe due to how offset is built. See details on `get_last_offset`
let last_offset: T = unsafe { get_last_offset(offset_buffer) };

// offsets
extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);

mutable.child_data[0].extend(
index,
offsets[start].as_usize(),
offsets[start + len].as_usize(),
)
},
)
} else {
// nulls present: append item by item, ignoring null entries
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
let offset_buffer = &mut mutable.buffer1;

// this is safe due to how offset is built. See details on `get_last_offset`
let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };

let delta_len = array.len() - array.null_count();
offset_buffer.reserve(delta_len * std::mem::size_of::<T>());

let child = &mut mutable.child_data[0];
(start..start + len).for_each(|i| {
if array.is_valid(i) {
// compute the new offset
last_offset = last_offset + offsets[i + 1] - offsets[i];

// append value
child.extend(
index,
offsets[i].as_usize(),
offsets[i + 1].as_usize(),
);
}
// append offset
offset_buffer.push(last_offset);
})
},
)
}
Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
let offset_buffer = &mut mutable.buffer1;

// this is safe due to how offset is built. See details on `get_last_offset`
let last_offset: T = unsafe { get_last_offset(offset_buffer) };

// offsets
extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);

mutable.child_data[0].extend(
index,
offsets[start].as_usize(),
offsets[start + len].as_usize(),
)
},
)
}

pub(super) fn extend_nulls<T: ArrowNativeType>(
Expand Down
44 changes: 9 additions & 35 deletions arrow-data/src/transform/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,41 +18,15 @@
use super::{Extend, _MutableArrayData};
use crate::ArrayData;

pub(super) fn build_extend(array: &ArrayData) -> Extend {
if array.null_count() == 0 {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, start, start + len))
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData,
index: usize,
start: usize,
len: usize| {
(start..start + len).for_each(|i| {
if array.is_valid(i) {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, i, i + 1))
} else {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend_nulls(1))
}
})
},
)
}
pub(super) fn build_extend(_: &ArrayData) -> Extend {
Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, start, start + len))
},
)
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
Expand Down
61 changes: 15 additions & 46 deletions arrow-data/src/transform/variable_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,54 +46,23 @@ pub(super) fn build_extend<
) -> Extend {
let offsets = array.buffer::<T>(0);
let values = array.buffers()[1].as_slice();
if array.null_count() == 0 {
// fast case where we can copy regions without null issues
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let offset_buffer = &mut mutable.buffer1;
let values_buffer = &mut mutable.buffer2;
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let offset_buffer = &mut mutable.buffer1;
let values_buffer = &mut mutable.buffer2;

// this is safe due to how offset is built. See details on `get_last_offset`
let last_offset = unsafe { get_last_offset(offset_buffer) };
// this is safe due to how offset is built. See details on `get_last_offset`
let last_offset = unsafe { get_last_offset(offset_buffer) };

extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);
// values
extend_offset_values::<T>(values_buffer, offsets, values, start, len);
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let offset_buffer = &mut mutable.buffer1;
let values_buffer = &mut mutable.buffer2;

// this is safe due to how offset is built. See details on `get_last_offset`
let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };

// nulls present: append item by item, ignoring null entries
offset_buffer.reserve(len * std::mem::size_of::<T>());

(start..start + len).for_each(|i| {
if array.is_valid(i) {
// compute the new offset
let length = offsets[i + 1] - offsets[i];
last_offset = last_offset + length;

// append value
let bytes = &values[offsets[i].to_usize().unwrap()
..offsets[i + 1].to_usize().unwrap()];
values_buffer.extend_from_slice(bytes);
}
// offsets are always present
offset_buffer.push(last_offset);
})
},
)
}
extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);
// values
extend_offset_values::<T>(values_buffer, offsets, values, start, len);
},
)
}

pub(super) fn extend_nulls<T: ArrowNativeType>(
Expand Down

0 comments on commit f261051

Please sign in to comment.