Skip to content

Commit

Permalink
Rename to rank_quantile
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Jan 20, 2025
1 parent e6618d6 commit 8c59c76
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 139 deletions.
18 changes: 9 additions & 9 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankPercentileOptions;
using compute::RankQuantileOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -152,10 +152,10 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("sort_keys", &RankOptions::sort_keys),
DataMember("null_placement", &RankOptions::null_placement),
DataMember("tiebreaker", &RankOptions::tiebreaker));
static auto kRankPercentileOptionsType = GetFunctionOptionsType<RankPercentileOptions>(
DataMember("sort_keys", &RankPercentileOptions::sort_keys),
DataMember("null_placement", &RankPercentileOptions::null_placement),
DataMember("factor", &RankPercentileOptions::factor));
static auto kRankQuantileOptionsType = GetFunctionOptionsType<RankQuantileOptions>(
DataMember("sort_keys", &RankQuantileOptions::sort_keys),
DataMember("null_placement", &RankQuantileOptions::null_placement),
DataMember("factor", &RankQuantileOptions::factor));
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
DataMember("periods", &PairwiseOptions::periods));
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
Expand Down Expand Up @@ -233,13 +233,13 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
tiebreaker(tiebreaker) {}
constexpr char RankOptions::kTypeName[];

RankPercentileOptions::RankPercentileOptions(std::vector<SortKey> sort_keys,
NullPlacement null_placement, double factor)
: FunctionOptions(internal::kRankPercentileOptionsType),
RankQuantileOptions::RankQuantileOptions(std::vector<SortKey> sort_keys,
NullPlacement null_placement, double factor)
: FunctionOptions(internal::kRankQuantileOptionsType),
sort_keys(std::move(sort_keys)),
null_placement(null_placement),
factor(factor) {}
constexpr char RankPercentileOptions::kTypeName[];
constexpr char RankQuantileOptions::kTypeName[];

PairwiseOptions::PairwiseOptions(int64_t periods)
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
Expand Down
22 changes: 11 additions & 11 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,20 +195,20 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
Tiebreaker tiebreaker;
};

/// \brief Percentile rank options
class ARROW_EXPORT RankPercentileOptions : public FunctionOptions {
/// \brief Quantile rank options
class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
public:
explicit RankPercentileOptions(std::vector<SortKey> sort_keys = {},
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0);
explicit RankQuantileOptions(std::vector<SortKey> sort_keys = {},
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0);
/// Convenience constructor for array inputs
explicit RankPercentileOptions(SortOrder order,
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0)
: RankPercentileOptions({SortKey("", order)}, null_placement, factor) {}
explicit RankQuantileOptions(SortOrder order,
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0)
: RankQuantileOptions({SortKey("", order)}, null_placement, factor) {}

static constexpr char const kTypeName[] = "RankPercentileOptions";
static RankPercentileOptions Defaults() { return RankPercentileOptions(); }
static constexpr char const kTypeName[] = "RankQuantileOptions";
static RankQuantileOptions Defaults() { return RankQuantileOptions(); }

/// Column key(s) to order by and how to order by these sort keys.
std::vector<SortKey> sort_keys;
Expand Down
50 changes: 24 additions & 26 deletions cpp/src/arrow/compute/kernels/vector_rank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include "arrow/compute/function.h"
#include "arrow/compute/kernels/vector_sort_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/util/logging.h"

namespace arrow::compute::internal {

Expand Down Expand Up @@ -68,9 +67,9 @@ const RankOptions* GetDefaultRankOptions() {
return &kDefaultRankOptions;
}

const RankPercentileOptions* GetDefaultPercentileRankOptions() {
static const auto kDefaultPercentileRankOptions = RankPercentileOptions::Defaults();
return &kDefaultPercentileRankOptions;
const RankQuantileOptions* GetDefaultQuantileRankOptions() {
static const auto kDefaultQuantileRankOptions = RankQuantileOptions::Defaults();
return &kDefaultQuantileRankOptions;
}

template <typename ArrowType>
Expand Down Expand Up @@ -165,9 +164,9 @@ class SortAndMarkDuplicate : public TypeVisitor {
NullPartitionResult sorted_{};
};

// A helper class that emits rankings for the "rank_percentile" function
struct PercentileRanker {
explicit PercentileRanker(double factor) : factor_(factor) {}
// A helper class that emits rankings for the "rank_quantile" function
struct QuantileRanker {
explicit QuantileRanker(double factor) : factor_(factor) {}

Result<Datum> CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) {
const int64_t length = sorted.overall_end() - sorted.overall_begin();
Expand All @@ -190,10 +189,10 @@ struct PercentileRanker {
}
// The run length, i.e. the frequency of the current value
int64_t freq = run_end - it;
double percentile = (cum_freq + 0.5 * freq) * factor_ / static_cast<double>(length);
// Output percentile rank values
double quantile = (cum_freq + 0.5 * freq) * factor_ / static_cast<double>(length);
// Output quantile rank values
for (; it < run_end; ++it) {
out_begin[original_index(*it)] = percentile;
out_begin[original_index(*it)] = quantile;
}
cum_freq += freq;
}
Expand Down Expand Up @@ -286,18 +285,18 @@ const FunctionDoc rank_doc(
"The handling of nulls, NaNs and tiebreakers can be changed in RankOptions."),
{"input"}, "RankOptions");

const FunctionDoc rank_percentile_doc(
"Compute percentile ranks of an array",
("This function computes a percentile rank of the input array.\n"
const FunctionDoc rank_quantile_doc(
"Compute quantile ranks of an array",
("This function computes a quantile rank of the input array.\n"
"By default, null values are considered greater than any other value and\n"
"are therefore sorted at the end of the input. For floating-point types,\n"
"NaNs are considered greater than any other non-null value, but smaller\n"
"than null values.\n"
"Results are computed as in https://en.wikipedia.org/wiki/Percentile_rank\n"
"Results are computed as in https://en.wikipedia.org/wiki/Quantile_rank\n"
"\n"
"The handling of nulls and NaNs, and the constant factor can be changed\n"
"in RankPercentileOptions."),
{"input"}, "RankPercentileOptions");
"in RankQuantileOptions."),
{"input"}, "RankQuantileOptions");

template <typename Derived>
class RankMetaFunctionBase : public MetaFunction {
Expand Down Expand Up @@ -369,28 +368,27 @@ class RankMetaFunction : public RankMetaFunctionBase<RankMetaFunction> {
: RankMetaFunctionBase("rank", Arity::Unary(), rank_doc, GetDefaultRankOptions()) {}
};

class RankPercentileMetaFunction
: public RankMetaFunctionBase<RankPercentileMetaFunction> {
class RankQuantileMetaFunction : public RankMetaFunctionBase<RankQuantileMetaFunction> {
public:
using FunctionOptionsType = RankPercentileOptions;
using RankerType = PercentileRanker;
using FunctionOptionsType = RankQuantileOptions;
using RankerType = QuantileRanker;

static bool NeedsDuplicates(const RankPercentileOptions&) { return true; }
static bool NeedsDuplicates(const RankQuantileOptions&) { return true; }

static RankerType GetRanker(const RankPercentileOptions& options) {
static RankerType GetRanker(const RankQuantileOptions& options) {
return RankerType(options.factor);
}

RankPercentileMetaFunction()
: RankMetaFunctionBase("rank_percentile", Arity::Unary(), rank_percentile_doc,
GetDefaultPercentileRankOptions()) {}
RankQuantileMetaFunction()
: RankMetaFunctionBase("rank_quantile", Arity::Unary(), rank_quantile_doc,
GetDefaultQuantileRankOptions()) {}
};

} // namespace

void RegisterVectorRank(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunction(std::make_shared<RankMetaFunction>()));
DCHECK_OK(registry->AddFunction(std::make_shared<RankPercentileMetaFunction>()));
DCHECK_OK(registry->AddFunction(std::make_shared<RankQuantileMetaFunction>()));
}

} // namespace arrow::compute::internal
Loading

0 comments on commit 8c59c76

Please sign in to comment.