Skip to content

Commit

Permalink
update according to PR comments
Browse files Browse the repository at this point in the history
- DISTINCT_COUNT_APPROX should be added to keywordsCanBeId

Signed-off-by: YANGDB <[email protected]>
  • Loading branch information
YANG-DB committed Nov 11, 2024
1 parent b7f0855 commit 0ae73e4
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 26 deletions.
3 changes: 2 additions & 1 deletion docs/ppl-lang/ppl-rare-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Using ``rare`` command to find the least common tuple of values of all fields in
* N: number of results to return. **Default**: 10
* field-list: mandatory. comma-delimited list of field names.
* by-clause: optional. one or more fields to group the results by.
* top_approx: approximate the count by using estimated [cardinality by HyperLogLog++ algorithm](https://spark.apache.org/docs/3.5.2/sql-ref-functions-builtin.html).
* rare_approx: approximate count of the rare (n) fields by using estimated [cardinality by HyperLogLog++ algorithm](https://spark.apache.org/docs/3.5.2/sql-ref-functions-builtin.html).


### Example 1: Find the least common values in a field
Expand All @@ -22,6 +22,7 @@ The example finds least common gender of all the accounts.
PPL query:

os> source=accounts | rare gender;
os> source=accounts | rare_approx 10 gender;
os> source=accounts | rare_approx gender;
fetched rows / total rows = 2/2
+----------+
Expand Down
6 changes: 3 additions & 3 deletions docs/ppl-lang/ppl-top-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Using ``top`` command to find the most common tuple of values of all fields in t
* N: number of results to return. **Default**: 10
* field-list: mandatory. comma-delimited list of field names.
* by-clause: optional. one or more fields to group the results by.
* top_approx: approximate the count by using estimated [cardinality by HyperLogLog++ algorithm](https://spark.apache.org/docs/3.5.2/sql-ref-functions-builtin.html).
* top_approx: approximate count of the (n) top fields by using estimated [cardinality by HyperLogLog++ algorithm](https://spark.apache.org/docs/3.5.2/sql-ref-functions-builtin.html).

### Example 1: Find the most common values in a field

Expand All @@ -20,7 +20,7 @@ The example finds most common gender of all the accounts.
PPL query:

os> source=accounts | top gender;
os> source=accounts_approx | top gender;
os> source=accounts | top_approx gender;
fetched rows / total rows = 2/2
+----------+
| gender |
Expand All @@ -35,7 +35,7 @@ The example finds most common gender of all the accounts.

PPL query:

os> source=accounts_approx | top 1 gender;
os> source=accounts | top_approx 1 gender;
fetched rows / total rows = 1/1
+----------+
| gender |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ EVAL: 'EVAL';
HEAD: 'HEAD';
TOP_APPROX: 'TOP_APPROX';
TOP: 'TOP';
RARE_APPROX: 'RARE_APPROX';
RARE_APPROX: 'RARE_APPROX';
RARE: 'RARE';
PARSE: 'PARSE';
METHOD: 'METHOD';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,7 @@ keywordsCanBeId
// AGGREGATIONS
| statsFunctionName
| DISTINCT_COUNT
| DISTINCT_COUNT_APPROX
| PERCENTILE
| PERCENTILE_APPROX
| ESTDC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,6 @@ public LogicalPlan reduce(BiFunction<LogicalPlan, LogicalPlan, LogicalPlan> tran
return result;
}).orElse(getPlan()));
}

/**
* update context using the given action and node
*/
public CatalystPlanContext update(UnaryOperator<CatalystPlanContext> action) {
return action.apply(this);
}

/**
* apply for each plan with the given function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,21 +370,19 @@ public LogicalPlan visitAlias(Alias node, CatalystPlanContext context) {

@Override
public LogicalPlan visitProject(Project node, CatalystPlanContext context) {
context.update((ctx) -> {
if (node.isExcluded()) {
List<UnresolvedExpression> intersect = ctx.getProjectedFields().stream()
.filter(node.getProjectList()::contains)
.collect(Collectors.toList());
if (!intersect.isEmpty()) {
// Fields in parent projection, but they have be excluded in child. For example,
// source=t | fields - A, B | fields A, B, C will throw "[Field A, Field B] can't be resolved"
throw new SyntaxCheckException(intersect + " can't be resolved");
}
} else {
ctx.withProjectedFields(node.getProjectList());
//update plan's context prior to visiting node children
if (node.isExcluded()) {
List<UnresolvedExpression> intersect = context.getProjectedFields().stream()
.filter(node.getProjectList()::contains)
.collect(Collectors.toList());
if (!intersect.isEmpty()) {
// Fields in parent projection, but they have be excluded in child. For example,
// source=t | fields - A, B | fields A, B, C will throw "[Field A, Field B] can't be resolved"
throw new SyntaxCheckException(intersect + " can't be resolved");
}
return ctx;
});
} else {
context.withProjectedFields(node.getProjectList());
}
LogicalPlan child = visitFirstChild(node, context);
visitExpressionList(node.getProjectList(), context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
import java.util.Map;
import java.util.function.Function;

import static org.opensearch.flint.spark.ppl.OpenSearchPPLLexer.DISTINCT_COUNT_APPROX;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADD;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDDATE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.APPROX_COUNT_DISTINCT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATEDIFF;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_ADD;
Expand Down Expand Up @@ -109,6 +111,7 @@ public interface BuiltinFunctionTransformer {
.put(TO_JSON_STRING, "to_json")
.put(JSON_KEYS, "json_object_keys")
.put(JSON_EXTRACT, "get_json_object")
.put(APPROX_COUNT_DISTINCT, "approx_count_distinct")
.build();

/**
Expand Down

0 comments on commit 0ae73e4

Please sign in to comment.