Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathanc-n committed Oct 21, 2024
1 parent 067cc29 commit f25090f
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 19 deletions.
32 changes: 15 additions & 17 deletions datafusion/functions-window/src/cume_dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ fn get_cume_dist_doc() -> &'static Documentation {
Documentation::builder()
.with_doc_section(DOC_SECTION_RANKING)
.with_description(
"Number of the current row within its partition, counting from 1.",
"Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).",
)
.with_syntax_example("row_number()")
.with_syntax_example("cume_dist()")
.build()
.unwrap()
})
Expand All @@ -115,22 +115,20 @@ impl PartitionEvaluator for CumeDistEvaluator {
num_rows: usize,
ranks_in_partition: &[Range<usize>],
) -> Result<ArrayRef> {
if num_rows == 0 {
return Ok(Arc::new(Float64Array::from(Vec::<f64>::new())));
}

let scalar = num_rows as f64;
let mut cumulative = 0_u64;
let mut values = Vec::with_capacity(num_rows);

for range in ranks_in_partition {
let len = range.end - range.start;
cumulative += len as u64;
let value = cumulative as f64 / scalar;
values.extend(iter::repeat(value).take(len));
}

Ok(Arc::new(Float64Array::from(values)))
let result = Float64Array::from_iter_values(
ranks_in_partition
.iter()
.scan(0_u64, |acc, range| {
let len = range.end - range.start;
*acc += len as u64;
let value: f64 = (*acc as f64) / scalar;
let result = iter::repeat(value).take(len);
Some(result)
})
.flatten(),
);
Ok(Arc::new(result))
}

fn include_rank(&self) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ use datafusion::functions_aggregate::expr_fn::{
use datafusion::functions_aggregate::min_max::max_udaf;
use datafusion::functions_nested::map::map;
use datafusion::functions_window::expr_fn::{
cume_dist, dense_rank, lag, lead, percent_rank, rank, row_number,
dense_rank, lag, lead, percent_rank, rank, row_number,
};
use datafusion::functions_window::rank::rank_udwf;
use datafusion::prelude::*;
Expand Down
9 changes: 9 additions & 0 deletions docs/source/user-guide/sql/window_functions_new.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,20 @@ All [aggregate functions](aggregate_functions.md) can be used as window function

## Ranking Functions

- [cume_dist](#cume_dist)
- [dense_rank](#dense_rank)
- [percent_rank](#percent_rank)
- [rank](#rank)
- [row_number](#row_number)

### `cume_dist`

Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).

```
cume_dist()
```

### `dense_rank`

Returns the rank of the current row without gaps. This function ranks rows in a dense manner, meaning consecutive ranks are assigned even for identical values.
Expand Down

0 comments on commit f25090f

Please sign in to comment.