Skip to content

Commit

Permalink
fix: array_resize null fix (apache#13209)
Browse files Browse the repository at this point in the history
* array_resize null fix

* comment

* clippy

* fixes
  • Loading branch information
jonathanc-n authored Nov 2, 2024
1 parent 752561a commit 24d953e
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 5 deletions.
36 changes: 33 additions & 3 deletions datafusion/functions-nested/src/resize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@

use crate::utils::make_scalar_function;
use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, OffsetBuffer};
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait,
};
use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
Expand Down Expand Up @@ -134,6 +136,23 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
return exec_err!("array_resize needs two or three arguments");
}

let array = &arg[0];

// Checks if entire array is null
if array.null_count() == array.len() {
let return_type = match array.data_type() {
List(field) => List(Arc::clone(field)),
LargeList(field) => LargeList(Arc::clone(field)),
_ => {
return exec_err!(
"array_resize does not support type '{:?}'.",
array.data_type()
)
}
};
return Ok(new_null_array(&return_type, array.len()));
}

let new_len = as_int64_array(&arg[1])?;
let new_element = if arg.len() == 3 {
Some(Arc::clone(&arg[2]))
Expand Down Expand Up @@ -184,7 +203,16 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
capacity,
);

let mut null_builder = BooleanBufferBuilder::new(array.len());

for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
if array.is_null(row_index) {
null_builder.append(false);
offsets.push(offsets[row_index]);
continue;
}
null_builder.append(true);

let count = count_array.value(row_index).to_usize().ok_or_else(|| {
internal_datafusion_err!("array_resize: failed to convert size to usize")
})?;
Expand All @@ -211,10 +239,12 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
}

let data = mutable.freeze();
let null_bit_buffer: NullBuffer = null_builder.finish().into();

Ok(Arc::new(GenericListArray::<O>::try_new(
Arc::clone(field),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
None,
Some(null_bit_buffer),
)?))
}
49 changes: 47 additions & 2 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -6985,7 +6985,7 @@ select array_resize(column1, column2, column3) from arrays_values;
[11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2]
[21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
[31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
NULL
[]
[51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
Expand All @@ -6997,7 +6997,7 @@ select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) f
[11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2]
[21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
[31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
NULL
[]
[51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
Expand All @@ -7013,6 +7013,51 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
----
[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]

# array_resize null value
query ?
select array_resize(arrow_cast(NULL, 'List(Int8)'), 1);
----
NULL

statement ok
CREATE TABLE array_resize_values
AS VALUES
(make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1),
(make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2),
(make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3),
(make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4),
(NULL, 3, 0),
(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6),
(make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL),
(make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7)
;

# array_resize columnar test #1
query ?
select array_resize(column1, column2, column3) from array_resize_values;
----
[1, ]
[11, 12, , 14, 15]
[21, 22, 23, 24, , 26, 27, 28]
[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4]
NULL
[]
[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ]
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7]

# array_resize columnar test #2
query ?
select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values;
----
[1, ]
[11, 12, , 14, 15]
[21, 22, 23, 24, , 26, 27, 28]
[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4]
NULL
[]
[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ]
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7]

## array_reverse
query ??
select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));
Expand Down

0 comments on commit 24d953e

Please sign in to comment.