Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to sort query by field cardinality #2425

Merged
merged 10 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
<version.microservice.common-utils>3.0.0</version.microservice.common-utils>
<version.microservice.dictionary-api>4.0.0</version.microservice.dictionary-api>
<version.microservice.mapreduce-query-api>1.0.0</version.microservice.mapreduce-query-api>
<version.microservice.metadata-utils>4.0.0</version.microservice.metadata-utils>
<version.microservice.metadata-utils>4.0.3</version.microservice.metadata-utils>
<version.microservice.metrics-reporter>3.0.0</version.microservice.metrics-reporter>
<version.microservice.query-api>1.0.0</version.microservice.query-api>
<version.microservice.query-metric-api>4.0.0</version.microservice.query-metric-api>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ public QueryFieldsVisitor(MetadataHelper helper) {

private Object parseSingleField(JexlNode node, Object data) {
String field = JexlASTHelper.getIdentifier(node);
((Set<String>) data).add(field);
if (field != null) {
((Set<String>) data).add(field);
}
return data;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
import datawave.query.jexl.visitors.PushdownLowSelectivityNodesVisitor;
import datawave.query.jexl.visitors.PushdownMissingIndexRangeNodesVisitor;
import datawave.query.jexl.visitors.PushdownUnexecutableNodesVisitor;
import datawave.query.jexl.visitors.QueryFieldsVisitor;
import datawave.query.jexl.visitors.QueryModelVisitor;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator;
Expand Down Expand Up @@ -2704,7 +2705,7 @@ public Tuple2<CloseableIterable<QueryPlan>,Boolean> getQueryRanges(ScannerFactor
}

if (config.isSortQueryBeforeGlobalIndex()) {
queryTree = OrderByCostVisitor.order((ASTJexlScript) queryTree);
config.setQueryTree(timedSortQueryBeforeGlobalIndex(config, getMetadataHelper()));
}

// if a simple examination of the query has not forced a full table
Expand Down Expand Up @@ -2791,6 +2792,20 @@ public Tuple2<CloseableIterable<QueryPlan>,Boolean> getQueryRanges(ScannerFactor
return new Tuple2<>(ranges, needsFullTable);
}

protected ASTJexlScript timedSortQueryBeforeGlobalIndex(ShardQueryConfiguration config, MetadataHelper metadataHelper) throws DatawaveQueryException {
return visitorManager.timedVisit(config.getTimers(), "SortQueryBeforeGlobalIndex", () -> {
ivakegg marked this conversation as resolved.
Show resolved Hide resolved
Set<String> fields = QueryFieldsVisitor.parseQueryFields(config.getQueryTree(), getMetadataHelper());
if (!fields.isEmpty()) {
Set<String> datatypes = config.getDatatypeFilter();
Map<String,Long> counts = metadataHelper.getCountsForFieldsInDateRange(fields, datatypes, config.getBeginDate(), config.getEndDate());
if (!counts.isEmpty()) {
return OrderByCostVisitor.orderByFieldCount(config.getQueryTree(), counts);
}
}
return config.getQueryTree();
});
}

private TypeMetadata getTypeMetadata() {
try {
return metadataHelper.getTypeMetadata();
Expand Down
37 changes: 32 additions & 5 deletions warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.SeekingFilter;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.commons.collections.iterators.IteratorChain;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.ParseException;
import org.apache.log4j.Logger;
Expand All @@ -38,6 +40,8 @@

import com.google.common.collect.Sets;

import datawave.accumulo.inmemory.InMemoryAccumuloClient;
import datawave.accumulo.inmemory.InMemoryInstance;
import datawave.configuration.spring.SpringBean;
import datawave.core.query.configuration.GenericQueryConfiguration;
import datawave.helpers.PrintUtility;
Expand All @@ -59,7 +63,11 @@
/**
* A set of tests that emphasize the influence of datatypes on query planning and execution
* <p>
* Data is from {@link ShapesIngest} test set
* Data is from {@link ShapesIngest} test set.
* <p>
* <b>Note:</b> This test class does NOT use of the {@link RebuildingScannerTestHelper}. That helper class makes use of the Apache Common's
* {@link IteratorChain} in a way that is incompatible with Accumulo's {@link SeekingFilter}. Namely, during a rebuild on a next call the ScannerHelper's call
* to 'ChainIterator.next' will swap in a whole new seeking filter in a way that causes the call to 'range.clip' on SeekingFilter#222 to return null.
*/
public abstract class ShapesTest {

Expand Down Expand Up @@ -99,8 +107,8 @@ public static class ShardRange extends ShapesTest {

@BeforeClass
public static void setUp() throws Exception {
QueryTestTableHelper testHelper = new QueryTestTableHelper(ShardRange.class.toString(), log);
client = testHelper.client;
InMemoryInstance i = new InMemoryInstance(ShardRange.class.getName());
client = new InMemoryAccumuloClient("", i);

ShapesIngest.writeData(client, ShapesIngest.RangeType.SHARD);

Expand All @@ -122,8 +130,8 @@ public static class DocumentRange extends ShapesTest {

@BeforeClass
public static void setUp() throws Exception {
QueryTestTableHelper testHelper = new QueryTestTableHelper(DocumentRange.class.toString(), log);
client = testHelper.client;
InMemoryInstance i = new InMemoryInstance(DocumentRange.class.getName());
client = new InMemoryAccumuloClient("", i);

ShapesIngest.writeData(client, ShapesIngest.RangeType.DOCUMENT);

Expand Down Expand Up @@ -861,4 +869,23 @@ public void testPermutations() throws Exception {
}
}

@Test
public void testSortQueryBeforeGlobalIndex() throws Exception {
try {
// SHAPE cardinality for triangle and pentagon types is 23
// TYPE cardinality for triangle and pentagon types is 21
withQuery("SHAPE == 'triangle' || TYPE == 'pentagon'");
withParameter(QueryParameters.DATATYPE_FILTER_SET, "triangle,pentagon");

Set<String> expectedUids = new HashSet<>(triangleUids);
withExpected(expectedUids);

logic.setSortQueryBeforeGlobalIndex(true);
planAndExecuteQuery();
assertPlannedQuery("TYPE == 'pentagon' || SHAPE == 'triangle'");
} finally {
logic.setSortQueryBeforeGlobalIndex(false);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ public void testValueExceededMarker() throws ParseException {
test(query, Collections.singleton("FOO"));
}

@Test
public void testMethod() throws ParseException {
String query = "QUOTE.size() == 1";
test(query, Collections.emptySet());
}

private void test(String query, Set<String> fields) throws ParseException {

// query as string entrance point
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package datawave.query.util;

import static datawave.util.TableName.METADATA;
import static datawave.util.TableName.SHARD;
import static datawave.util.TableName.SHARD_INDEX;
import static datawave.util.TableName.SHARD_RINDEX;

import java.util.Date;
import java.util.List;
Expand All @@ -10,8 +12,10 @@
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.BatchWriterConfig;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.LongCombiner;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.hadoop.io.Text;

Expand Down Expand Up @@ -99,6 +103,8 @@ public enum RangeType {
private static final NumberType number = new NumberType();
private static final LcNoDiacriticsListType list = new LcNoDiacriticsListType();

private static final LongCombiner.VarLenEncoder encoder = new LongCombiner.VarLenEncoder();

protected static String normalizerForField(String field) {
switch (field) {
case "SHAPE":
Expand All @@ -122,6 +128,12 @@ protected static String normalizerForField(String field) {

public static void writeData(AccumuloClient client, RangeType type) throws Exception {

TableOperations tops = client.tableOperations();
tops.create(SHARD);
tops.create(SHARD_INDEX);
tops.create(SHARD_RINDEX);
tops.create(METADATA);

BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(1000L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1);
Mutation m;

Expand Down Expand Up @@ -485,11 +497,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep
m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value);

m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(12L));
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(13L));
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L));
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(10L));
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(14L));

m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value);
Expand Down Expand Up @@ -518,11 +530,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep
m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value);

m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(10L));
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(14L));
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L));
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(13L));
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(12L));

m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value);
Expand Down Expand Up @@ -640,4 +652,8 @@ private static Value getValue(RangeType type, String uid) {
}
return new Value(builder.build().toByteArray());
}

private static Value createValue(long count) {
return new Value(encoder.encode(count));
}
}
Loading