> getFieldIndexHoles(Text targetCol
throws TableNotFoundException, IOException {
// create local copies to avoid side effects
fields = new HashSet<>(fields);
- datatypes = new HashSet<>(datatypes);
+ datatypes = datatypes == null ? null : new HashSet<>(datatypes);
// Handle null fields if given.
if (fields == null) {
diff --git a/src/main/java/datawave/query/util/MetadataHelper.java b/src/main/java/datawave/query/util/MetadataHelper.java
index 830adc5..3b8cd24 100644
--- a/src/main/java/datawave/query/util/MetadataHelper.java
+++ b/src/main/java/datawave/query/util/MetadataHelper.java
@@ -91,15 +91,14 @@
*
* Helper class to fetch the set of field names which are only indexed, i.e. do not occur as attributes in the event.
*
- *
+ *
*
* This set would normally includes all tokenized content fields. In terms of keys in the DatawaveMetadata table, this set would contain all rows in the
* {@code DatawaveMetadata} table which have a {@link ColumnFamilyConstants#COLF_I} but not a {@link ColumnFamilyConstants#COLF_E}
*
- *
- *
+ *
+ *
* TODO -- Break this class apart
- *
*/
@EnableCaching
@Component("metadataHelper")
@@ -170,7 +169,7 @@ public MetadataHelper(AllFieldMetadataHelper allFieldMetadataHelper, Collection<
* allMetadataAuths is a singleton Collection of one Authorizations instance that contains all the auths required to see everything in the Metadata table.
*
* This is effectively a userAuths.containsAll(metadataAuths)
call.
- *
+ *
* @param usersAuthsCollection
* the user authorizations
* @param allMetadataAuthsCollection
@@ -360,13 +359,7 @@ public Set getAllFields(Set ingestTypeFilter) throws TableNotFou
}
Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(allFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(allFields.get(datatype));
- }
- }
+ fields.addAll(getFields(allFields, ingestTypeFilter));
// Add any additional fields that are created at evaluation time and are hence not in the metadata table.
fields.addAll(evaluationOnlyFields);
@@ -383,7 +376,7 @@ public Set getEvaluationOnlyFields() {
/**
* Set the evaluation only fields
- *
+ *
* @param evaluationOnlyFields
* a collection of evaluation only fields
*/
@@ -394,7 +387,7 @@ public void setEvaluationOnlyFields(Set evaluationOnlyFields) {
/**
* Get the fields that have values not in the same form as the event (excluding normalization). This would include index only fields, term frequency fields
* (as the index may contain tokens), and composite fields.
- *
+ *
* @param ingestTypeFilter
* set of ingest types used to restrict the scan
* @return the non-event fields
@@ -445,20 +438,12 @@ public Set getIndexOnlyFields(Set ingestTypeFilter) throws Table
Multimap indexOnlyFields = this.allFieldMetadataHelper.getIndexOnlyFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(indexOnlyFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(indexOnlyFields.get(datatype));
- }
- }
- return Collections.unmodifiableSet(fields);
+ return getFields(indexOnlyFields, ingestTypeFilter);
}
/**
* Get a QueryModel from the specified table
- *
+ *
* @param modelTableName
* the query model table
* @param modelName
@@ -604,7 +589,7 @@ public Set getQueryModelNames(String modelTableName) throws TableNotFoun
/**
* Determines whether a field has been reverse indexed by looking for the ri column in the metadata table
- *
+ *
* @param fieldName
* the field
* @param ingestTypeFilter
@@ -615,7 +600,6 @@ public Set getQueryModelNames(String modelTableName) throws TableNotFoun
*/
public boolean isReverseIndexed(String fieldName, Set ingestTypeFilter) throws TableNotFoundException {
Preconditions.checkNotNull(fieldName);
- Preconditions.checkNotNull(ingestTypeFilter);
Entry>> entry = Maps.immutableEntry(metadataTableName, Maps.immutableEntry(fieldName, ingestTypeFilter));
@@ -628,7 +612,7 @@ public boolean isReverseIndexed(String fieldName, Set ingestTypeFilter)
/**
* Determines whether a field has been indexed by looking for the i column in the metadata table
- *
+ *
* @param fieldName
* the field
* @param ingestTypeFilter
@@ -639,7 +623,6 @@ public boolean isReverseIndexed(String fieldName, Set ingestTypeFilter)
*/
public boolean isIndexed(String fieldName, Set ingestTypeFilter) throws TableNotFoundException {
Preconditions.checkNotNull(fieldName);
- Preconditions.checkNotNull(ingestTypeFilter);
Entry>> entry = Maps.immutableEntry(metadataTableName, Maps.immutableEntry(fieldName, ingestTypeFilter));
@@ -653,7 +636,7 @@ public boolean isIndexed(String fieldName, Set ingestTypeFilter) throws
/**
* Determines whether a field has been tokenized by looking for the tf column in the metadata table
- *
+ *
* @param fieldName
* the field name
* @param ingestTypeFilter
@@ -664,7 +647,6 @@ public boolean isIndexed(String fieldName, Set ingestTypeFilter) throws
*/
public boolean isTokenized(String fieldName, Set ingestTypeFilter) throws TableNotFoundException {
Preconditions.checkNotNull(fieldName);
- Preconditions.checkNotNull(ingestTypeFilter);
Entry>> entry = Maps.immutableEntry(metadataTableName, Maps.immutableEntry(fieldName, ingestTypeFilter));
@@ -800,7 +782,7 @@ public Map> getTermCountsWithRootAu
/**
* Returns a Set of all TextNormalizers in use by any type in Accumulo
- *
+ *
* @return a set of all normalizers
* @throws InstantiationException
* it can't, remove this
@@ -854,7 +836,7 @@ public Set> getAllDatatypes() throws InstantiationException, IllegalAcce
/**
* A map of composite name to the ordered list of it for example, mapping of {@code COLOR -> ['COLOR_WHEELS,0', 'MAKE_COLOR,1' ]}. If called multiple time,
* it returns the same cached map.
- *
+ *
* @return An unmodifiable Multimap of composite fields
* @throws TableNotFoundException
* if no table exists
@@ -1107,15 +1089,7 @@ public Set getTermFrequencyFields(Set ingestTypeFilter) throws T
Multimap termFrequencyFields = loadTermFrequencyFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(termFrequencyFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(termFrequencyFields.get(datatype));
- }
- }
- return Collections.unmodifiableSet(fields);
+ return getFields(termFrequencyFields, ingestTypeFilter);
}
/**
@@ -1131,15 +1105,7 @@ public Set getIndexedFields(Set ingestTypeFilter) throws TableNo
Multimap indexedFields = this.allFieldMetadataHelper.loadIndexedFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(indexedFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(indexedFields.get(datatype));
- }
- }
- return Collections.unmodifiableSet(fields);
+ return getFields(indexedFields, ingestTypeFilter);
}
/**
@@ -1153,22 +1119,14 @@ public Set getIndexedFields(Set ingestTypeFilter) throws TableNo
*/
public Set getReverseIndexedFields(Set ingestTypeFilter) throws TableNotFoundException {
- Multimap indexedFields = this.allFieldMetadataHelper.loadReverseIndexedFields();
+ Multimap reverseIndexedFields = this.allFieldMetadataHelper.loadReverseIndexedFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(indexedFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(indexedFields.get(datatype));
- }
- }
- return Collections.unmodifiableSet(fields);
+ return getFields(reverseIndexedFields, ingestTypeFilter);
}
/**
* Get expansion fields using the data type filter.
- *
+ *
* @param ingestTypeFilter
* the ingest type filter
* @return the set of expansion fields that match the provided ingest type filter
@@ -1179,20 +1137,12 @@ public Set getExpansionFields(Set ingestTypeFilter) throws Table
Multimap expansionFields = this.allFieldMetadataHelper.loadExpansionFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(expansionFields.values());
- } else {
- for (String datatype : ingestTypeFilter) {
- fields.addAll(expansionFields.get(datatype));
- }
- }
- return Collections.unmodifiableSet(fields);
+ return getFields(expansionFields, ingestTypeFilter);
}
/**
* Get the content fields which are those to be queried when using the content functions.
- *
+ *
* @param ingestTypeFilter
* the ingest type filter
* @return the fields used for content functions given the ingest type filter
@@ -1203,15 +1153,19 @@ public Set getContentFields(Set ingestTypeFilter) throws TableNo
Multimap contentFields = this.allFieldMetadataHelper.loadContentFields();
- Set fields = new HashSet<>();
- if (ingestTypeFilter == null || ingestTypeFilter.isEmpty()) {
- fields.addAll(contentFields.values());
- } else {
+ return getFields(contentFields, ingestTypeFilter);
+ }
+
+ private Set getFields(Multimap fields, Set ingestTypeFilter) {
+ Set returnedFields = new HashSet<>();
+ if (ingestTypeFilter == null) {
+ returnedFields.addAll(fields.values());
+ } else if (!ingestTypeFilter.isEmpty()) {
for (String datatype : ingestTypeFilter) {
- fields.addAll(contentFields.get(datatype));
- }
+ returnedFields.addAll(fields.get(datatype));
+ } // non-null but empty typeFilters allow nothing
}
- return Collections.unmodifiableSet(fields);
+ return Collections.unmodifiableSet(returnedFields);
}
/**
@@ -1327,7 +1281,7 @@ public Set getDatatypes(Set ingestTypeFilter) throws TableNotFou
/**
* Returns the sum of counts for the given field across all datatypes in the date range
- *
+ *
* @param fieldName
* the field
* @param begin
@@ -1342,7 +1296,7 @@ public Long getCountsByFieldForDays(String fieldName, Date begin, Date end) {
/**
* Returns the sum of all counts for the given fields and datatypes from the start date to the end date.
- *
+ *
* @param fieldName
* the field name to filter on
* @param begin
@@ -1358,7 +1312,6 @@ public Long getCountsByFieldForDays(String fieldName, Date begin, Date end, Set<
Preconditions.checkNotNull(begin);
Preconditions.checkNotNull(end);
Preconditions.checkArgument((begin.before(end) || begin.getTime() == end.getTime()));
- Preconditions.checkNotNull(dataTypes);
Date truncatedBegin = DateUtils.truncate(begin, Calendar.DATE);
Date truncatedEnd = DateUtils.truncate(end, Calendar.DATE);
@@ -1417,11 +1370,11 @@ public Long getCountsByFieldInDay(String fieldName, String date) {
public Long getCountsByFieldInDayWithTypes(String fieldName, String date, final Set datatypes) {
Preconditions.checkNotNull(fieldName);
Preconditions.checkNotNull(date);
- Preconditions.checkNotNull(datatypes);
try {
Map countsByType = getCountsByFieldInDayWithTypes(Maps.immutableEntry(fieldName, date));
- Iterable> filteredByType = Iterables.filter(countsByType.entrySet(), input -> datatypes.contains(input.getKey()));
+ Iterable> filteredByType = datatypes == null ? countsByType.entrySet()
+ : Iterables.filter(countsByType.entrySet(), input -> datatypes.contains(input.getKey()));
long sum = 0;
for (Entry entry : filteredByType) {
@@ -1577,7 +1530,7 @@ public Map getCountsForFieldsInDateRange(Set fields, Set getCountsForFieldsInDateRange(Set fields, Set getCountsForFieldsInDateRange(Set fields, Set datatypes, String beginDate, String endDate) {
- SortedSet sortedDatatypes = new TreeSet<>(datatypes);
+ SortedSet sortedDatatypes = datatypes == null ? new TreeSet<>() : new TreeSet<>(datatypes);
Map fieldCounts = new HashMap<>();
Set ranges = createFieldCountRanges(fields, sortedDatatypes, beginDate, endDate);
@@ -1668,7 +1621,7 @@ private Set createFieldCountRanges(Set fields, SortedSet
/**
* Deserialize a Value that contains a Long
- *
+ *
* @param value
* an accumulo Value
* @return a long
@@ -1685,7 +1638,7 @@ private Long readLongFromValue(Value value) throws IOException {
/**
* Get the earliest occurrence of a field across all datatypes
- *
+ *
* @param fieldName
* the field
* @return the earliest date that this field occurs
@@ -1696,7 +1649,7 @@ public Date getEarliestOccurrenceOfField(String fieldName) {
/**
* Get the earliest occurrence of a field for the given datatype
- *
+ *
* @param fieldName
* the field
* @param dataType
@@ -1718,7 +1671,7 @@ public Date getEarliestOccurrenceOfFieldWithType(String fieldName, final String
/**
* Get the earliest occurrence of a field given a datatype
- *
+ *
* @param fieldName
* the field
* @param dataType
@@ -1791,7 +1744,7 @@ protected Date getEarliestOccurrenceOfFieldWithType(String fieldName, final Stri
/**
* Return the field index holes calculated between all "i" and "f" entries. The map consists of field names to datatypes to field index holes.
- *
+ *
* @param fields
* the fields to fetch field index holes for, an empty set will result in all fields being fetched
* @param datatypes
@@ -1808,7 +1761,7 @@ public Map> getFieldIndexHoles(Set fie
/**
* Return the field index holes calculated between all "ri" and "f" entries. The map consists of field names to datatypes to field index holes.
- *
+ *
* @param fields
* the fields to fetch field index holes for, an empty set will result in all fields being fetched
* @param datatypes
@@ -1941,7 +1894,7 @@ protected Multimap loadAllFields() throws TableNotFoundException
/**
* Fetches results from metadata table and calculates the set of fieldNames which are indexed but do not appear as an attribute on the Event Returns a
* multimap of datatype to field
- *
+ *
* @throws TableNotFoundException
* if no table exists
*/
diff --git a/src/test/java/datawave/query/util/MetadataHelperTableTest.java b/src/test/java/datawave/query/util/MetadataHelperTableTest.java
index 20f9290..2b2cd16 100644
--- a/src/test/java/datawave/query/util/MetadataHelperTableTest.java
+++ b/src/test/java/datawave/query/util/MetadataHelperTableTest.java
@@ -297,7 +297,7 @@ public void testGetMetadataWithDatatypeFilter() throws Exception {
@Test
public void testGetAllFields() throws Exception {
Set expected = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY");
- assertEquals(expected, helper.getAllFields(Collections.emptySet()));
+ assertEquals(expected, helper.getAllFields(null));
// and with filter
Set filter = Collections.singleton("datatype-a");
@@ -314,7 +314,8 @@ public void testGetEvaluationOnlyFields() {
@Test
public void testGetNonEventFields() throws Exception {
Set expected = Set.of("DEFINITION");
- assertEquals(expected, helper.getNonEventFields(Collections.emptySet()));
+ assertEquals(expected, helper.getNonEventFields(null));
+ assertEquals(Collections.emptySet(), helper.getNonEventFields(Collections.emptySet()));
// then restrict the filter
Set filter = Collections.singleton("datatype-a");
@@ -340,7 +341,8 @@ public void testIsOverloadedCompositeField() {
@Test
public void testGetIndexOnlyFields() throws Exception {
Set expected = Set.of("DEFINITION");
- assertEquals(expected, helper.getIndexOnlyFields(Collections.emptySet()));
+ assertEquals(expected, helper.getIndexOnlyFields(null));
+ assertEquals(Collections.emptySet(), helper.getIndexOnlyFields(Collections.emptySet()));
// restrict filter
Set filter = Collections.singleton("datatype-b");
@@ -569,7 +571,7 @@ public void testGetDatatypeFromClass() throws Exception {
public void testGetTermFrequencyFields() throws Exception {
Set tokenizedFields = Set.of("DEFINITION");
assertEquals(tokenizedFields, helper.getTermFrequencyFields(null));
- assertEquals(tokenizedFields, helper.getTermFrequencyFields(Collections.emptySet()));
+ assertEquals(Collections.emptySet(), helper.getTermFrequencyFields(Collections.emptySet()));
assertEquals(Collections.emptySet(), helper.getTermFrequencyFields(Set.of("datatype-c")));
}
@@ -577,7 +579,7 @@ public void testGetTermFrequencyFields() throws Exception {
public void testGetIndexedFields() throws Exception {
Set indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION");
assertEquals(indexedFields, helper.getIndexedFields(null));
- assertEquals(indexedFields, helper.getIndexedFields(Collections.emptySet()));
+ assertEquals(Collections.emptySet(), helper.getIndexedFields(Collections.emptySet()));
assertEquals(Set.of("SHAPE", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("COLOR", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-b")));
assertEquals(Collections.emptySet(), helper.getIndexedFields(Set.of("datatype-c")));
@@ -587,7 +589,7 @@ public void testGetIndexedFields() throws Exception {
public void testGetReverseIndexedFields() throws Exception {
Set reverseIndexedFields = Set.of("SHAPE", "DEFINITION");
assertEquals(reverseIndexedFields, helper.getReverseIndexedFields(null));
- assertEquals(reverseIndexedFields, helper.getReverseIndexedFields(Collections.emptySet()));
+ assertEquals(Collections.emptySet(), helper.getReverseIndexedFields(Collections.emptySet()));
assertEquals(Set.of("SHAPE", "DEFINITION"), helper.getReverseIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("DEFINITION"), helper.getReverseIndexedFields(Set.of("datatype-b")));
assertEquals(Collections.emptySet(), helper.getReverseIndexedFields(Set.of("datatype-c")));
@@ -597,7 +599,7 @@ public void testGetReverseIndexedFields() throws Exception {
public void testGetExpansionFields() throws Exception {
Set expansionFields = Set.of("EXP_1", "EXP_2");
assertEquals(expansionFields, helper.getExpansionFields(null));
- assertEquals(expansionFields, helper.getExpansionFields(Collections.emptySet()));
+ assertEquals(Collections.emptySet(), helper.getExpansionFields(Collections.emptySet()));
assertEquals(Set.of("EXP_1"), helper.getExpansionFields(Set.of("datatype-a")));
assertEquals(Set.of("EXP_2"), helper.getExpansionFields(Set.of("datatype-b")));
assertEquals(Collections.emptySet(), helper.getExpansionFields(Set.of("datatype-c")));
@@ -607,7 +609,7 @@ public void testGetExpansionFields() throws Exception {
public void testGetContentFields() throws Exception {
Set contentFields = Set.of("DEFINITION");
assertEquals(contentFields, helper.getContentFields(null));
- assertEquals(contentFields, helper.getContentFields(Collections.emptySet()));
+ assertEquals(Collections.emptySet(), helper.getContentFields(Collections.emptySet()));
assertEquals(contentFields, helper.getContentFields(Set.of("datatype-a")));
assertEquals(Collections.emptySet(), helper.getContentFields(Set.of("datatype-b")));
}
@@ -678,13 +680,11 @@ public void testGetCountsByFieldForDays() {
@Test
public void testGetCountsByFieldForDaysWithIngestTypeFilter() {
// range of single day
- assertThrows(NullPointerException.class, () -> helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240301"), null));
assertEquals(0L, helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240301"), Collections.emptySet()));
assertEquals(23L, helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240301"), Set.of("datatype-a")));
assertEquals(0L, helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240301"), Set.of("datatype-b")));
// full range
- assertThrows(NullPointerException.class, () -> helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240305"), null));
assertEquals(0L, helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240305"), Collections.emptySet()));
// 559 is wrong. Full count is 658.
assertEquals(559L, helper.getCountsByFieldForDays("SHAPE", getDate("20240301"), getDate("20240305"), Set.of("datatype-a")));
@@ -699,7 +699,6 @@ public void testGetCountsByFieldInDay() {
@Test
public void testGetCountsByFieldInDayWithTypes() {
- assertThrows(NullPointerException.class, () -> helper.getCountsByFieldInDayWithTypes("SHAPE", "20240301", null));
assertEquals(0L, helper.getCountsByFieldInDayWithTypes("SHAPE", "20240301", Collections.emptySet()));
assertEquals(23L, helper.getCountsByFieldInDayWithTypes("SHAPE", "20240301", Set.of("datatype-a")));
assertEquals(0L, helper.getCountsByFieldInDayWithTypes("SHAPE", "20240315", Set.of("datatype-a")));
diff --git a/src/test/java/datawave/query/util/MetadataHelperTest.java b/src/test/java/datawave/query/util/MetadataHelperTest.java
index 5486880..b72d96f 100644
--- a/src/test/java/datawave/query/util/MetadataHelperTest.java
+++ b/src/test/java/datawave/query/util/MetadataHelperTest.java
@@ -73,7 +73,7 @@ public void testSingleFieldFilter() throws TableNotFoundException {
Assertions.assertEquals(Collections.singleton("rowA"), helper.getAllFields(Collections.singleton("dataTypeA")));
Assertions.assertEquals(Collections.singleton("rowA"), helper.getAllFields(null));
- Assertions.assertEquals(Collections.singleton("rowA"), helper.getAllFields(Collections.emptySet()));
+ Assertions.assertEquals(Collections.emptySet(), helper.getAllFields(Collections.emptySet()));
}
@Test
@@ -83,7 +83,7 @@ public void testMultipleFieldFilter() throws TableNotFoundException {
Assertions.assertEquals(Collections.singleton("rowB"), helper.getAllFields(Collections.singleton("dataTypeB")));
Assertions.assertEquals(Sets.newHashSet("rowA", "rowB"), helper.getAllFields(null));
- Assertions.assertEquals(Sets.newHashSet("rowA", "rowB"), helper.getAllFields(Collections.emptySet()));
+ Assertions.assertEquals(Collections.emptySet(), helper.getAllFields(Collections.emptySet()));
}
@Test
@@ -94,7 +94,7 @@ public void testMultipleFieldFilter2() throws TableNotFoundException {
Assertions.assertEquals(Collections.singleton("rowB"), helper.getAllFields(Collections.singleton("dataTypeB")));
Assertions.assertEquals(Sets.newHashSet("rowA", "rowB", "rowC"), helper.getAllFields(null));
- Assertions.assertEquals(Sets.newHashSet("rowA", "rowB", "rowC"), helper.getAllFields(Collections.emptySet()));
+ Assertions.assertEquals(Collections.emptySet(), helper.getAllFields(Collections.emptySet()));
}
private void writeMutation(String row, String columnFamily, String columnQualifier, Value value) throws TableNotFoundException {