Skip to content

Commit

Permalink
recluster the small blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
zhyass committed Sep 23, 2024
1 parent 7a8c7c9 commit 700bd47
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/query/expression/src/utils/block_thresholds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ impl BlockThresholds {
}

#[inline]
pub fn check_for_recluster(&self, total_rows: usize, total_bytes: usize) -> bool {
total_rows <= self.max_rows_per_block && total_bytes <= self.max_bytes_per_block
pub fn check_too_small(&self, row_count: usize, block_size: usize) -> bool {
row_count < self.min_rows_per_block / 2 && block_size < self.max_bytes_per_block / 2
}

#[inline]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ impl ReclusterMutator {

let mut total_rows = 0;
let mut total_bytes = 0;
let mut small_blocks = Vec::new();
let mut points_map: HashMap<Vec<Scalar>, (Vec<usize>, Vec<usize>)> = HashMap::new();
for i in indices.iter() {
if let Some(stats) = &blocks[*i].cluster_stats {
Expand All @@ -225,14 +226,21 @@ impl ReclusterMutator {
.and_modify(|v| v.1.push(*i))
.or_insert((vec![], vec![*i]));
}
// Record block fragments for compact.
if self.block_thresholds.check_too_small(
blocks[*i].row_count as usize,
blocks[*i].block_size as usize,
) {
small_blocks.push(*i);
}
total_rows += blocks[*i].row_count;
total_bytes += blocks[*i].block_size;
}

// If the statistics of blocks are too small, just merge them into one block.
if self
.block_thresholds
.check_for_recluster(total_rows as usize, total_bytes as usize)
.check_for_compact(total_rows as usize, total_bytes as usize)
{
debug!(
"recluster: the statistics of blocks are too small, just merge them into one block"
Expand All @@ -252,10 +260,13 @@ impl ReclusterMutator {
break;
}

let selected_idx =
let mut selected_idx =
self.fetch_max_depth(points_map, self.depth_threshold, max_blocks_num)?;
if selected_idx.is_empty() {
continue;
if level != 0 || small_blocks.len() < 2 {
continue;
}
selected_idx = IndexSet::from_iter(small_blocks);
}

let mut task_bytes = 0;
Expand Down

0 comments on commit 700bd47

Please sign in to comment.