diff --git a/src/common/arrow/src/arrow/array/binview/mod.rs b/src/common/arrow/src/arrow/array/binview/mod.rs index 69bda66794b5..2d0ebc49e5b9 100644 --- a/src/common/arrow/src/arrow/array/binview/mod.rs +++ b/src/common/arrow/src/arrow/array/binview/mod.rs @@ -166,6 +166,18 @@ impl BinaryViewArrayGeneric { total_bytes_len: usize, total_buffer_len: usize, ) -> Self { + #[cfg(debug_assertions)] + { + if total_bytes_len != UNKNOWN_LEN as usize { + let total = views.iter().map(|v| v.length as usize).sum::(); + assert_eq!(total, total_bytes_len); + } + + if total_buffer_len != UNKNOWN_LEN as usize { + let total = buffers.iter().map(|v| v.len()).sum::(); + assert_eq!(total, total_buffer_len); + } + } // # Safety // The caller must ensure // - the data is valid utf8 (if required) diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index abf2530b6a38..119fa085bd08 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -155,12 +155,11 @@ impl MutableBinaryViewArray { #[inline] pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer]) { let len = v.length; - self.total_bytes_len += len as usize; if len <= 12 { + self.total_bytes_len += len as usize; debug_assert!(self.views.capacity() > self.views.len()); self.views.push(v) } else { - self.total_buffer_len += len as usize; let data = buffers.get_unchecked(v.buffer_idx as usize); let offset = v.offset as usize; let bytes = data.get_unchecked(offset..offset + len as usize); @@ -263,12 +262,19 @@ impl MutableBinaryViewArray { // Push and pop to get the properly encoded value. // For long string this leads to a dictionary encoding, // as we push the string only once in the buffers + + let old_bytes_len = self.total_bytes_len; + let view_value = value .map(|v| { self.push_value_ignore_validity(v); self.views.pop().unwrap() }) .unwrap_or_default(); + + self.total_bytes_len += + (self.total_bytes_len - old_bytes_len) * additional.saturating_sub(1); + self.views .extend(std::iter::repeat(view_value).take(additional)); } diff --git a/src/common/arrow/src/arrow/array/growable/binview.rs b/src/common/arrow/src/arrow/array/growable/binview.rs index a23ba22bffe4..6fa525471e82 100644 --- a/src/common/arrow/src/arrow/array/growable/binview.rs +++ b/src/common/arrow/src/arrow/array/growable/binview.rs @@ -74,7 +74,6 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { capacity: usize, ) -> Self { let data_type = arrays[0].data_type().clone(); - // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { @@ -91,11 +90,8 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { .map(|buf| BufferKey { inner: buf }) }) .collect::>(); - let total_buffer_len = arrays - .iter() - .map(|arr| arr.data_buffers().len()) - .sum::(); + let total_buffer_len = buffers.iter().map(|v| v.inner.len()).sum(); Self { arrays, data_type, diff --git a/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs index 0c23a157f65c..e0384ad7bd39 100644 --- a/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs +++ b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs @@ -29,3 +29,17 @@ fn extend_from_iter() { .as_box() ) } + +#[test] +fn extend_from_repeats() { + let mut b = MutableBinaryViewArray::::new(); + b.extend_constant(4, Some("databend")); + + let a = b.clone(); + b.extend_trusted_len_values(a.values_iter()); + + assert_eq!( + b.as_box(), + MutableBinaryViewArray::::from_values_iter(vec!["databend"; 8].into_iter()).as_box() + ) +} diff --git a/src/query/expression/src/kernels/filter.rs b/src/query/expression/src/kernels/filter.rs index 11bfffe08812..1af12f7b046f 100644 --- a/src/query/expression/src/kernels/filter.rs +++ b/src/query/expression/src/kernels/filter.rs @@ -358,7 +358,7 @@ impl<'a> FilterVisitor<'a> { new_views, values.data.data_buffers().clone(), None, - Some(values.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take.rs b/src/query/expression/src/kernels/take.rs index f4957a1c19f3..7c12444d99ce 100644 --- a/src/query/expression/src/kernels/take.rs +++ b/src/query/expression/src/kernels/take.rs @@ -292,7 +292,7 @@ where I: databend_common_arrow::arrow::types::Index new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take_compact.rs b/src/query/expression/src/kernels/take_compact.rs index 2cf400264b6d..3353f467c5fe 100644 --- a/src/query/expression/src/kernels/take_compact.rs +++ b/src/query/expression/src/kernels/take_compact.rs @@ -238,7 +238,7 @@ impl<'a> TakeCompactVisitor<'a> { new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take_ranges.rs b/src/query/expression/src/kernels/take_ranges.rs index 872f3f5829ef..f8ed6e1d84f5 100644 --- a/src/query/expression/src/kernels/take_ranges.rs +++ b/src/query/expression/src/kernels/take_ranges.rs @@ -239,7 +239,7 @@ impl<'a> TakeRangeVisitor<'a> { new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test b/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test index f07c0150dfcb..8a0dfda36bfd 100644 --- a/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test +++ b/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test @@ -71,6 +71,12 @@ select name, database, owner from system.tables where database='c' and name ='t1 ---- t100 c account_admin +statement ok +select * from system.malloc_stats_totals; + +statement ok +select * from system.malloc_stats; + statement ok drop database if exists a; diff --git a/tests/sqllogictests/suites/query/join/left_outer.test b/tests/sqllogictests/suites/query/join/left_outer.test index ec53a1887605..d8edb290f8da 100644 --- a/tests/sqllogictests/suites/query/join/left_outer.test +++ b/tests/sqllogictests/suites/query/join/left_outer.test @@ -298,8 +298,18 @@ SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a; 4 5 NULL NULL 5 6 NULL NULL + +statement ok +create or replace table t1 (a string) as select number from numbers(100); + +statement ok +create or replace table t2 (a string) as select number from numbers(100); + +## just check it works or not statement ok -set max_block_size = 65536; +select * from ( + select 'SN0LL' as k from t1 +) as a1 left join (select * from t2) as a2 on a1.k = a2.a; statement ok DROP TABLE IF EXISTS t1;