Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for patterned model mappings #39

Merged
merged 9 commits into from
Nov 29, 2024
10 changes: 10 additions & 0 deletions src/main/java/datawave/query/model/FieldMapping.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.Collections;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
Expand Down Expand Up @@ -71,6 +73,14 @@ public void validate() {
if (fieldName == null || modelFieldName == null || columnVisibility == null) {
throw new IllegalArgumentException("Cannot have a model mapping with without all members: " + this);
}
// If this is a forward mapping, it's possible that a regex pattern is supplied for the field name. Verify that the field name compiles.
if (direction == Direction.FORWARD) {
try {
Pattern.compile(fieldName);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Invalid regex pattern supplied for field name: " + fieldName, e);
}
}
}
}

Expand Down
20 changes: 15 additions & 5 deletions src/main/java/datawave/query/util/MetadataHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

Expand Down Expand Up @@ -516,13 +517,22 @@ public QueryModel getQueryModel(String modelTableName, String modelName, Collect
if (!mapping.isFieldMapping()) {
queryModel.setModelFieldAttributes(mapping.getModelFieldName(), mapping.getAttributes());
} else if (mapping.getDirection() == Direction.FORWARD) {
// Do not add a forward mapping entry
// when the replacement does not exist in the database
// If a direct match is found for the field in the database, add a forward mapping entry.
if (allFields.contains(mapping.getFieldName())) {
queryModel.addTermToModel(mapping.getModelFieldName(), mapping.getFieldName());
} else if (log.isTraceEnabled()) {
log.trace("Ignoring forward mapping of {} for {} because the metadata table has no reference to it", mapping.getFieldName(),
mapping.getModelFieldName());
} else {
// If a direct match was not found for the field name, it's possible that a regex pattern was supplied. Attempt to find matches
// based off matching against the field name as a pattern.
Pattern pattern = Pattern.compile(mapping.getFieldName());
Set<String> matches = allFields.stream().filter(field -> pattern.matcher(field).matches()).collect(Collectors.toSet());
if (!matches.isEmpty()) {
matches.forEach(field -> queryModel.addTermToModel(mapping.getModelFieldName(), field));
} else {
if (log.isTraceEnabled()) {
log.trace("Ignoring forward mapping of {} for {} because the metadata table has no reference to it", mapping.getFieldName(),
mapping.getModelFieldName());
}
}
}
} else {
queryModel.addTermToReverseModel(mapping.getFieldName(), mapping.getModelFieldName());
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/datawave/query/model/FieldMappingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package datawave.query.model;

import java.util.Collections;

import org.junit.Test;
import org.junit.jupiter.api.Assertions;

public class FieldMappingTest {

/**
* Verify that creating a forward mapping with a regular field name does not result in any exceptions.
*/
@Test
public void testForwardMappingWithPlainFieldName() {
Assertions.assertDoesNotThrow(() -> new FieldMapping("datatype", "DB_NAME", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()));
}

/**
* Verify that creating a forward mapping with an invalid pattern for the field name results in an exception.
*/
@Test
public void testForwardMappingWithInvalidPatternFieldName() {
Assertions.assertThrows(IllegalArgumentException.class,
() -> new FieldMapping("datatype", "[\\]", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()),
"Invalid regex pattern supplied for field name: [\\]");
}

/**
* Verify that creating a forward mapping with a valid pattern for the field name does not result in an exception.
*/
@Test
public void testForwardMappingWithValidPatternFieldName() {
Assertions.assertDoesNotThrow(() -> new FieldMapping("datatype", "DB_NAME.*", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()));
}
}
74 changes: 58 additions & 16 deletions src/test/java/datawave/query/util/MetadataHelperTableTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;

import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.BatchWriter;
Expand All @@ -34,6 +35,7 @@
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.LongCombiner;
import org.apache.accumulo.core.security.Authorizations;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -47,6 +49,10 @@
import datawave.data.type.LcType;
import datawave.data.type.Type;
import datawave.query.composite.CompositeMetadataHelper;
import datawave.query.model.Direction;
import datawave.query.model.FieldMapping;
import datawave.query.model.ModelKeyParser;
import datawave.query.model.QueryModel;

/**
* Integration test for the {@link MetadataHelper}.
Expand Down Expand Up @@ -146,6 +152,25 @@ private static void writeData() throws Exception {
// write some 'counts'
MetadataCardinalityCounts counts = new MetadataCardinalityCounts("DEFINITION", "define", 23L, 34L, 45L, 56L, 67L, 78L);
write(bw, "DEFINITION", "count", "define", counts.getValue());

// Write a model.
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "EVENT_DATE", "start-time", Direction.FORWARD, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "EVENT_DATE", "start-time", Direction.REVERSE, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "UUID", "unique-id", Direction.FORWARD, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "UUID", "unique-id", Direction.REVERSE, "", Collections.emptySet()),
"TEST_MODEL"));
// Test using regex patterns in forward matching model mappings.
bw.addMutation(ModelKeyParser.createMutation(
new FieldMapping("", "TITLE|HEADER|DESIGNATION", "title", Direction.FORWARD, "", Collections.emptySet()), "TEST_MODEL"));
// Make sure the model fields appear when fetching all fields.
write(bw, "EVENT_DATE", "i", "datatype-a", EMPTY_VALUE);
write(bw, "UUID", "i", "datatype-a", EMPTY_VALUE);
write(bw, "TITLE", "i", "datatype-a", EMPTY_VALUE);
write(bw, "HEADER", "i", "datatype-a", EMPTY_VALUE);
write(bw, "DESIGNATION", "i", "datatype-a", EMPTY_VALUE);
}
}

Expand Down Expand Up @@ -262,9 +287,9 @@ public void testGetMetadata() throws Exception {
Metadata metadata = helper.getMetadata();

Set<String> datatypes = Set.of("datatype-a", "datatype-b");
Set<String> fields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION");
Set<String> indexOnlyFields = Set.of("DEFINITION");
Set<String> fields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexOnlyFields = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> termFrequencyFields = Set.of("DEFINITION");

assertEquals(datatypes, metadata.getDatatypes());
Expand All @@ -281,9 +306,9 @@ public void testGetMetadataWithDatatypeFilter() throws Exception {
Metadata metadata = helper.getMetadata(filter);

Set<String> datatypes = Set.of("datatype-a");
Set<String> fields = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY");
Set<String> indexedFields = Set.of("SHAPE", "DEFINITION");
Set<String> indexOnlyFields = Set.of("DEFINITION");
Set<String> fields = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexedFields = Set.of("SHAPE", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexOnlyFields = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> termFrequencyFields = Set.of("DEFINITION");

assertEquals(datatypes, metadata.getDatatypes());
Expand All @@ -296,12 +321,12 @@ public void testGetMetadataWithDatatypeFilter() throws Exception {

@Test
public void testGetAllFields() throws Exception {
Set<String> expected = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY");
Set<String> expected = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getAllFields(Collections.emptySet()));

// and with filter
Set<String> filter = Collections.singleton("datatype-a");
expected = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY");
expected = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getAllFields(filter));
}

Expand All @@ -313,7 +338,7 @@ public void testGetEvaluationOnlyFields() {

@Test
public void testGetNonEventFields() throws Exception {
Set<String> expected = Set.of("DEFINITION");
Set<String> expected = Set.of("DEFINITION", "EVENT_DATE", "HEADER", "TITLE", "UUID", "DESIGNATION");
assertEquals(expected, helper.getNonEventFields(Collections.emptySet()));

// then restrict the filter
Expand All @@ -339,16 +364,15 @@ public void testIsOverloadedCompositeField() {

@Test
public void testGetIndexOnlyFields() throws Exception {
Set<String> expected = Set.of("DEFINITION");
Set<String> expected = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getIndexOnlyFields(Collections.emptySet()));

// restrict filter
Set<String> filter = Collections.singleton("datatype-b");
expected = Set.of("DEFINITION");
assertEquals(expected, helper.getIndexOnlyFields(filter));
}

// skipping query model methods

@Test
public void testIsReverseIndexed() throws Exception {
// SHAPE is reverse indexed for datatype-a
Expand Down Expand Up @@ -575,10 +599,10 @@ public void testGetTermFrequencyFields() throws Exception {

@Test
public void testGetIndexedFields() throws Exception {
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_DATE", "DESIGNATION", "HEADER", "TITLE", "UUID");
assertEquals(indexedFields, helper.getIndexedFields(null));
assertEquals(indexedFields, helper.getIndexedFields(Collections.emptySet()));
assertEquals(Set.of("SHAPE", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), helper.getIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("COLOR", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-b")));
assertEquals(Collections.emptySet(), helper.getIndexedFields(Set.of("datatype-c")));
}
Expand Down Expand Up @@ -797,15 +821,15 @@ public void testUniqueDatatypesFunction() {
public void testLoadAllFields() throws Exception {
Multimap<String,String> fields = helper.loadAllFields();
assertEquals(Set.of("datatype-a", "datatype-b"), fields.keySet());
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_ONLY"), fields.get("datatype-a"));
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), fields.get("datatype-a"));
assertEquals(Set.of("COLOR", "DEFINITION"), fields.get("datatype-b"));
}

@Test
public void testLoadIndexOnlyFields() throws Exception {
Multimap<String,String> fields = helper.loadIndexOnlyFields();
assertEquals(Set.of("datatype-a", "datatype-b"), fields.keySet());
assertEquals(Set.of("DEFINITION"), fields.get("datatype-a"));
assertEquals(Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), fields.get("datatype-a"));
assertEquals(Set.of("DEFINITION"), fields.get("datatype-b"));
}

Expand Down Expand Up @@ -883,6 +907,24 @@ public void testInternalTypeCache() throws TableNotFoundException, Instantiation
assertNotSame(typesFirstCall.iterator().next(), typesSecondCall.iterator().next());
}

@Test
public void testGetQueryModel() throws TableNotFoundException, ExecutionException {
QueryModel queryModel = helper.getQueryModel(METADATA_TABLE_NAME, "TEST_MODEL");

// Assert the forward mappings.
Multimap<String,String> forwardMappings = queryModel.getForwardQueryMapping();
Assertions.assertTrue(forwardMappings.containsEntry("start-time", "EVENT_DATE"));
Assertions.assertTrue(forwardMappings.containsEntry("unique-id", "UUID"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "TITLE"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "HEADER"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "DESIGNATION"));

// Assert the reverse mappings.
Map<String,String> reverseMappings = queryModel.getReverseQueryMapping();
Assertions.assertEquals(reverseMappings.get("EVENT_DATE"), "start-time");
Assertions.assertEquals(reverseMappings.get("UUID"), "unique-id");
}

/**
* Assert that an iterable matches expectations
*
Expand Down