From 0066649f77c6f15b6deef0061cd677424dbba97b Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:46:18 +0000 Subject: [PATCH 1/3] Update the sortQueryBeforeGlobalIndex option to get field cardinality from the DatawaveMetadata table --- pom.xml | 4 +-- .../jexl/visitors/QueryFieldsVisitor.java | 4 ++- .../query/planner/DefaultQueryPlanner.java | 17 +++++++++++- .../jexl/visitors/QueryFieldsVisitorTest.java | 6 +++++ .../datawave/query/util/ShapesIngest.java | 27 ++++++++++++------- 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/pom.xml b/pom.xml index a7206d867a..3b103d7724 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ 1.3 4.5.13 4.4.8 - 4.0.0 + 4.0.1 9.4.21.Final 2.10.0.pr1 1.9.13 @@ -107,7 +107,7 @@ 3.0.0 4.0.0 1.0.0 - 4.0.0 + 4.0.2 3.0.0 1.0.0 4.0.0 diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryFieldsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryFieldsVisitor.java index 3f6cfcc770..480add9a6c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryFieldsVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryFieldsVisitor.java @@ -87,7 +87,9 @@ public QueryFieldsVisitor(MetadataHelper helper) { private Object parseSingleField(JexlNode node, Object data) { String field = JexlASTHelper.getIdentifier(node); - ((Set) data).add(field); + if (field != null) { + ((Set) data).add(field); + } return data; } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 3c7fd8ce57..c779090c48 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -134,6 +134,7 @@ import datawave.query.jexl.visitors.PushdownLowSelectivityNodesVisitor; import datawave.query.jexl.visitors.PushdownMissingIndexRangeNodesVisitor; import datawave.query.jexl.visitors.PushdownUnexecutableNodesVisitor; +import datawave.query.jexl.visitors.QueryFieldsVisitor; import datawave.query.jexl.visitors.QueryModelVisitor; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator; @@ -2709,7 +2710,7 @@ public Tuple2,Boolean> getQueryRanges(ScannerFactor } if (config.isSortQueryBeforeGlobalIndex()) { - queryTree = OrderByCostVisitor.order((ASTJexlScript) queryTree); + config.setQueryTree(timedSortQueryBeforeGlobalIndex(config, getMetadataHelper())); } // if a simple examination of the query has not forced a full table @@ -2796,6 +2797,20 @@ public Tuple2,Boolean> getQueryRanges(ScannerFactor return new Tuple2<>(ranges, needsFullTable); } + protected ASTJexlScript timedSortQueryBeforeGlobalIndex(ShardQueryConfiguration config, MetadataHelper metadataHelper) throws DatawaveQueryException { + return visitorManager.timedVisit(config.getTimers(), "SortQueryBeforeGlobalIndex", () -> { + Set fields = QueryFieldsVisitor.parseQueryFields(config.getQueryTree(), getMetadataHelper()); + if (!fields.isEmpty()) { + Set datatypes = config.getDatatypeFilter(); + Map counts = metadataHelper.getCountsForFieldsInDateRange(fields, datatypes, config.getBeginDate(), config.getEndDate()); + if (!counts.isEmpty()) { + return OrderByCostVisitor.orderByFieldCount(config.getQueryTree(), counts); + } + } + return config.getQueryTree(); + }); + } + private TypeMetadata getTypeMetadata() { try { return metadataHelper.getTypeMetadata(); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java index de22998bbb..df8eedd612 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java @@ -194,6 +194,12 @@ public void testValueExceededMarker() throws ParseException { test(query, Collections.singleton("FOO")); } + @Test + public void testMethod() throws ParseException { + String query = "QUOTE.size() == 1"; + test(query, Collections.emptySet()); + } + private void test(String query, Set fields) throws ParseException { // query as string entrance point diff --git a/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java index 7a6449632e..31d2ad9bdf 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java @@ -12,6 +12,7 @@ import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.LongCombiner; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.hadoop.io.Text; @@ -99,6 +100,8 @@ public enum RangeType { private static final NumberType number = new NumberType(); private static final LcNoDiacriticsListType list = new LcNoDiacriticsListType(); + private static final LongCombiner.VarLenEncoder encoder = new LongCombiner.VarLenEncoder(); + protected static String normalizerForField(String field) { switch (field) { case "SHAPE": @@ -485,11 +488,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value); m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value); + m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(12L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(13L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(10L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(14L)); m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value); m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value); @@ -518,11 +521,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value); m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value); - m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value); + m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(10L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(14L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(13L)); + m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(12L)); m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value); m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value); @@ -640,4 +643,8 @@ private static Value getValue(RangeType type, String uid) { } return new Value(builder.build().toByteArray()); } + + private static Value createValue(long count) { + return new Value(encoder.encode(count)); + } } From 4d1e26f26e6a501aaca31248867736422200d470 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Thu, 27 Jun 2024 10:55:05 +0000 Subject: [PATCH 2/3] Incrememt metadata-utils version to 4.0.3 to pickup bugfix --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3b103d7724..07490ae4ad 100644 --- a/pom.xml +++ b/pom.xml @@ -107,7 +107,7 @@ 3.0.0 4.0.0 1.0.0 - 4.0.2 + 4.0.3 3.0.0 1.0.0 4.0.0 From 31316180e4de706e44437f7674a579f00dff4b9c Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:46:46 +0000 Subject: [PATCH 3/3] Add test to validate query ordering based on field cardinality, update test framework to not use rebuilding scanner helper due to incompatible usages of interfaces --- .../test/java/datawave/query/ShapesTest.java | 37 ++++++++++++++++--- .../datawave/query/util/ShapesIngest.java | 9 +++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java index f266d5eb7b..8149117800 100644 --- a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java @@ -20,7 +20,9 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.user.SeekingFilter; import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.collections.iterators.IteratorChain; import org.apache.commons.jexl3.parser.ASTJexlScript; import org.apache.commons.jexl3.parser.ParseException; import org.apache.log4j.Logger; @@ -38,6 +40,8 @@ import com.google.common.collect.Sets; +import datawave.accumulo.inmemory.InMemoryAccumuloClient; +import datawave.accumulo.inmemory.InMemoryInstance; import datawave.configuration.spring.SpringBean; import datawave.core.query.configuration.GenericQueryConfiguration; import datawave.helpers.PrintUtility; @@ -59,7 +63,11 @@ /** * A set of tests that emphasize the influence of datatypes on query planning and execution *

- * Data is from {@link ShapesIngest} test set + * Data is from {@link ShapesIngest} test set. + *

+ * Note: This test class does NOT use of the {@link RebuildingScannerTestHelper}. That helper class makes use of the Apache Common's + * {@link IteratorChain} in a way that is incompatible with Accumulo's {@link SeekingFilter}. Namely, during a rebuild on a next call the ScannerHelper's call + * to 'ChainIterator.next' will swap in a whole new seeking filter in a way that causes the call to 'range.clip' on SeekingFilter#222 to return null. */ public abstract class ShapesTest { @@ -99,8 +107,8 @@ public static class ShardRange extends ShapesTest { @BeforeClass public static void setUp() throws Exception { - QueryTestTableHelper testHelper = new QueryTestTableHelper(ShardRange.class.toString(), log); - client = testHelper.client; + InMemoryInstance i = new InMemoryInstance(ShardRange.class.getName()); + client = new InMemoryAccumuloClient("", i); ShapesIngest.writeData(client, ShapesIngest.RangeType.SHARD); @@ -122,8 +130,8 @@ public static class DocumentRange extends ShapesTest { @BeforeClass public static void setUp() throws Exception { - QueryTestTableHelper testHelper = new QueryTestTableHelper(DocumentRange.class.toString(), log); - client = testHelper.client; + InMemoryInstance i = new InMemoryInstance(DocumentRange.class.getName()); + client = new InMemoryAccumuloClient("", i); ShapesIngest.writeData(client, ShapesIngest.RangeType.DOCUMENT); @@ -861,4 +869,23 @@ public void testPermutations() throws Exception { } } + @Test + public void testSortQueryBeforeGlobalIndex() throws Exception { + try { + // SHAPE cardinality for triangle and pentagon types is 23 + // TYPE cardinality for triangle and pentagon types is 21 + withQuery("SHAPE == 'triangle' || TYPE == 'pentagon'"); + withParameter(QueryParameters.DATATYPE_FILTER_SET, "triangle,pentagon"); + + Set expectedUids = new HashSet<>(triangleUids); + withExpected(expectedUids); + + logic.setSortQueryBeforeGlobalIndex(true); + planAndExecuteQuery(); + assertPlannedQuery("TYPE == 'pentagon' || SHAPE == 'triangle'"); + } finally { + logic.setSortQueryBeforeGlobalIndex(false); + } + } + } diff --git a/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java index 31d2ad9bdf..ab2377ec17 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/ShapesIngest.java @@ -1,7 +1,9 @@ package datawave.query.util; +import static datawave.util.TableName.METADATA; import static datawave.util.TableName.SHARD; import static datawave.util.TableName.SHARD_INDEX; +import static datawave.util.TableName.SHARD_RINDEX; import java.util.Date; import java.util.List; @@ -10,6 +12,7 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.admin.TableOperations; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.LongCombiner; @@ -125,6 +128,12 @@ protected static String normalizerForField(String field) { public static void writeData(AccumuloClient client, RangeType type) throws Exception { + TableOperations tops = client.tableOperations(); + tops.create(SHARD); + tops.create(SHARD_INDEX); + tops.create(SHARD_RINDEX); + tops.create(METADATA); + BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(1000L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1); Mutation m;