From 80ade4fabab59e2950024c0bbeb5cd45d28909c6 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 12 Nov 2024 20:24:35 +0800 Subject: [PATCH 1/7] chore(ci): enable debug_assertions in release build ci --- .github/actions/build_linux/action.yml | 6 ++++++ .github/workflows/reuse.linux.yml | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index 5b032f29b704..6941f69ef6e1 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -58,6 +58,12 @@ runs: flags="" ;; esac + + debug_assertions=${{ inputs.debug_assertions }} + if [[ $debug_assertions -eq 1 ]]; then + flags="--cfg debug_assertions ${flags}" + fi + if [[ ! -z "${flags}" ]]; then echo "RUSTFLAGS=${flags}" >> $GITHUB_ENV fi diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index 116c6c719995..2890f2ce6717 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -7,7 +7,7 @@ on: description: "Build profile, debug or release" type: string required: true - default: "debug" + default: "release" runner_provider: description: "Self-hosted runner provider, aws or gcp" type: string @@ -60,6 +60,7 @@ jobs: with: sha: ${{ github.sha }} target: ${{ matrix.arch }}-unknown-linux-gnu + debug_assertions: 1 artifacts: all build_udf: From e45f44f6c6e764fc98f72e9163b51c75c50e9fd1 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 15:54:55 +0800 Subject: [PATCH 2/7] fix(query): fix and check total_buffer_len and total_bytes_len --- src/common/arrow/src/arrow/array/binview/mod.rs | 12 ++++++++++++ src/common/arrow/src/arrow/array/binview/mutable.rs | 11 +++++++++++ src/common/arrow/src/arrow/array/growable/binview.rs | 6 +----- src/query/expression/src/kernels/filter.rs | 2 +- src/query/expression/src/kernels/take.rs | 2 +- src/query/expression/src/kernels/take_compact.rs | 2 +- src/query/expression/src/kernels/take_ranges.rs | 2 +- 7 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/common/arrow/src/arrow/array/binview/mod.rs b/src/common/arrow/src/arrow/array/binview/mod.rs index 69bda66794b5..40bb5d7140c2 100644 --- a/src/common/arrow/src/arrow/array/binview/mod.rs +++ b/src/common/arrow/src/arrow/array/binview/mod.rs @@ -166,6 +166,18 @@ impl BinaryViewArrayGeneric { total_bytes_len: usize, total_buffer_len: usize, ) -> Self { + #[cfg(debug_assertions)] + { + if total_bytes_len != UNKNOWN_LEN as usize { + let total = views.iter().map(|v| v.length as usize).sum::(); + assert_eq!(total, total_bytes_len); + } + + if total_buffer_len != UNKNOWN_LEN as usize { + let total = buffers.iter().map(|v| v.len() as usize).sum::(); + assert_eq!(total, total_buffer_len); + } + } // # Safety // The caller must ensure // - the data is valid utf8 (if required) diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index abf2530b6a38..2a55a258a5df 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -263,12 +263,23 @@ impl MutableBinaryViewArray { // Push and pop to get the properly encoded value. // For long string this leads to a dictionary encoding, // as we push the string only once in the buffers + + let old_bytes_len = self.total_bytes_len; + let old_buffer_len = self.total_buffer_len; + let view_value = value .map(|v| { self.push_value_ignore_validity(v); self.views.pop().unwrap() }) .unwrap_or_default(); + + self.total_bytes_len += + (self.total_bytes_len - old_bytes_len) * additional.saturating_sub(1); + + self.total_buffer_len += + (self.total_buffer_len - old_buffer_len) * additional.saturating_sub(1); + self.views .extend(std::iter::repeat(view_value).take(additional)); } diff --git a/src/common/arrow/src/arrow/array/growable/binview.rs b/src/common/arrow/src/arrow/array/growable/binview.rs index a23ba22bffe4..6fa525471e82 100644 --- a/src/common/arrow/src/arrow/array/growable/binview.rs +++ b/src/common/arrow/src/arrow/array/growable/binview.rs @@ -74,7 +74,6 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { capacity: usize, ) -> Self { let data_type = arrays[0].data_type().clone(); - // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { @@ -91,11 +90,8 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { .map(|buf| BufferKey { inner: buf }) }) .collect::>(); - let total_buffer_len = arrays - .iter() - .map(|arr| arr.data_buffers().len()) - .sum::(); + let total_buffer_len = buffers.iter().map(|v| v.inner.len()).sum(); Self { arrays, data_type, diff --git a/src/query/expression/src/kernels/filter.rs b/src/query/expression/src/kernels/filter.rs index 11bfffe08812..1af12f7b046f 100644 --- a/src/query/expression/src/kernels/filter.rs +++ b/src/query/expression/src/kernels/filter.rs @@ -358,7 +358,7 @@ impl<'a> FilterVisitor<'a> { new_views, values.data.data_buffers().clone(), None, - Some(values.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take.rs b/src/query/expression/src/kernels/take.rs index f4957a1c19f3..7c12444d99ce 100644 --- a/src/query/expression/src/kernels/take.rs +++ b/src/query/expression/src/kernels/take.rs @@ -292,7 +292,7 @@ where I: databend_common_arrow::arrow::types::Index new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take_compact.rs b/src/query/expression/src/kernels/take_compact.rs index 2cf400264b6d..3353f467c5fe 100644 --- a/src/query/expression/src/kernels/take_compact.rs +++ b/src/query/expression/src/kernels/take_compact.rs @@ -238,7 +238,7 @@ impl<'a> TakeCompactVisitor<'a> { new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) diff --git a/src/query/expression/src/kernels/take_ranges.rs b/src/query/expression/src/kernels/take_ranges.rs index 872f3f5829ef..f8ed6e1d84f5 100644 --- a/src/query/expression/src/kernels/take_ranges.rs +++ b/src/query/expression/src/kernels/take_ranges.rs @@ -239,7 +239,7 @@ impl<'a> TakeRangeVisitor<'a> { new_views, col.data.data_buffers().clone(), None, - Some(col.data.total_buffer_len()), + None, ) }; StringColumn::new(new_col) From 9d3dabd08b860dd63e2f2574d62cdaa707e24068 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 16:07:24 +0800 Subject: [PATCH 3/7] fix(query): fix and check total_buffer_len and total_bytes_len --- src/common/arrow/src/arrow/array/binview/mod.rs | 2 +- .../sqllogictests/suites/query/join/left_outer.test | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/common/arrow/src/arrow/array/binview/mod.rs b/src/common/arrow/src/arrow/array/binview/mod.rs index 40bb5d7140c2..2d0ebc49e5b9 100644 --- a/src/common/arrow/src/arrow/array/binview/mod.rs +++ b/src/common/arrow/src/arrow/array/binview/mod.rs @@ -174,7 +174,7 @@ impl BinaryViewArrayGeneric { } if total_buffer_len != UNKNOWN_LEN as usize { - let total = buffers.iter().map(|v| v.len() as usize).sum::(); + let total = buffers.iter().map(|v| v.len()).sum::(); assert_eq!(total, total_buffer_len); } } diff --git a/tests/sqllogictests/suites/query/join/left_outer.test b/tests/sqllogictests/suites/query/join/left_outer.test index ec53a1887605..d8edb290f8da 100644 --- a/tests/sqllogictests/suites/query/join/left_outer.test +++ b/tests/sqllogictests/suites/query/join/left_outer.test @@ -298,8 +298,18 @@ SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a; 4 5 NULL NULL 5 6 NULL NULL + +statement ok +create or replace table t1 (a string) as select number from numbers(100); + +statement ok +create or replace table t2 (a string) as select number from numbers(100); + +## just check it works or not statement ok -set max_block_size = 65536; +select * from ( + select 'SN0LL' as k from t1 +) as a1 left join (select * from t2) as a2 on a1.k = a2.a; statement ok DROP TABLE IF EXISTS t1; From 274c7f030626c3aeacbf6bfd4d3cc19ebfc87c78 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 16:19:14 +0800 Subject: [PATCH 4/7] reset ci build --- .github/actions/build_linux/action.yml | 6 ------ .github/workflows/reuse.linux.yml | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index 6941f69ef6e1..5b032f29b704 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -58,12 +58,6 @@ runs: flags="" ;; esac - - debug_assertions=${{ inputs.debug_assertions }} - if [[ $debug_assertions -eq 1 ]]; then - flags="--cfg debug_assertions ${flags}" - fi - if [[ ! -z "${flags}" ]]; then echo "RUSTFLAGS=${flags}" >> $GITHUB_ENV fi diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index 2890f2ce6717..116c6c719995 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -7,7 +7,7 @@ on: description: "Build profile, debug or release" type: string required: true - default: "release" + default: "debug" runner_provider: description: "Self-hosted runner provider, aws or gcp" type: string @@ -60,7 +60,6 @@ jobs: with: sha: ${{ github.sha }} target: ${{ matrix.arch }}-unknown-linux-gnu - debug_assertions: 1 artifacts: all build_udf: From ef927ec81ab030d7c3696fba01c327e8b5c01203 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 17:11:57 +0800 Subject: [PATCH 5/7] update --- src/common/arrow/src/arrow/array/binview/mutable.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index 2a55a258a5df..47edae716049 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -155,8 +155,8 @@ impl MutableBinaryViewArray { #[inline] pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer]) { let len = v.length; - self.total_bytes_len += len as usize; if len <= 12 { + self.total_bytes_len += len as usize; debug_assert!(self.views.capacity() > self.views.len()); self.views.push(v) } else { From a7ce16b22c0691909d3d87bfe96b39ba288336f9 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 18:11:36 +0800 Subject: [PATCH 6/7] update --- .../arrow/src/arrow/array/binview/mutable.rs | 4 ---- .../tests/it/arrow/array/binview/mutable_values.rs | 14 ++++++++++++++ .../base/01_system/01_0001_system_tables.test | 6 ++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index 47edae716049..d86bf21fe4b0 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -265,7 +265,6 @@ impl MutableBinaryViewArray { // as we push the string only once in the buffers let old_bytes_len = self.total_bytes_len; - let old_buffer_len = self.total_buffer_len; let view_value = value .map(|v| { @@ -277,9 +276,6 @@ impl MutableBinaryViewArray { self.total_bytes_len += (self.total_bytes_len - old_bytes_len) * additional.saturating_sub(1); - self.total_buffer_len += - (self.total_buffer_len - old_buffer_len) * additional.saturating_sub(1); - self.views .extend(std::iter::repeat(view_value).take(additional)); } diff --git a/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs index 0c23a157f65c..e0384ad7bd39 100644 --- a/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs +++ b/src/common/arrow/tests/it/arrow/array/binview/mutable_values.rs @@ -29,3 +29,17 @@ fn extend_from_iter() { .as_box() ) } + +#[test] +fn extend_from_repeats() { + let mut b = MutableBinaryViewArray::::new(); + b.extend_constant(4, Some("databend")); + + let a = b.clone(); + b.extend_trusted_len_values(a.values_iter()); + + assert_eq!( + b.as_box(), + MutableBinaryViewArray::::from_values_iter(vec!["databend"; 8].into_iter()).as_box() + ) +} diff --git a/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test b/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test index f07c0150dfcb..8a0dfda36bfd 100644 --- a/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test +++ b/tests/sqllogictests/suites/base/01_system/01_0001_system_tables.test @@ -71,6 +71,12 @@ select name, database, owner from system.tables where database='c' and name ='t1 ---- t100 c account_admin +statement ok +select * from system.malloc_stats_totals; + +statement ok +select * from system.malloc_stats; + statement ok drop database if exists a; From cf53812c33fd1f0f6bdc5ae6cfcd0b7ed7bf7333 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 15 Nov 2024 19:22:47 +0800 Subject: [PATCH 7/7] update --- src/common/arrow/src/arrow/array/binview/mutable.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/arrow/src/arrow/array/binview/mutable.rs b/src/common/arrow/src/arrow/array/binview/mutable.rs index d86bf21fe4b0..119fa085bd08 100644 --- a/src/common/arrow/src/arrow/array/binview/mutable.rs +++ b/src/common/arrow/src/arrow/array/binview/mutable.rs @@ -160,7 +160,6 @@ impl MutableBinaryViewArray { debug_assert!(self.views.capacity() > self.views.len()); self.views.push(v) } else { - self.total_buffer_len += len as usize; let data = buffers.get_unchecked(v.buffer_idx as usize); let offset = v.offset as usize; let bytes = data.get_unchecked(offset..offset + len as usize);