Skip to content

Commit

Permalink
Add support for patterned model mappings (#39)
Browse files Browse the repository at this point in the history
* Support using patterns when building the forward model mappings for a
query model.
Fixes #2429
  • Loading branch information
lbschanno authored Nov 29, 2024
1 parent 6169ce3 commit 51f1307
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 21 deletions.
10 changes: 10 additions & 0 deletions src/main/java/datawave/query/model/FieldMapping.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.Collections;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
Expand Down Expand Up @@ -71,6 +73,14 @@ public void validate() {
if (fieldName == null || modelFieldName == null || columnVisibility == null) {
throw new IllegalArgumentException("Cannot have a model mapping with without all members: " + this);
}
// If this is a forward mapping, it's possible that a regex pattern is supplied for the field name. Verify that the field name compiles.
if (direction == Direction.FORWARD) {
try {
Pattern.compile(fieldName);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Invalid regex pattern supplied for field name: " + fieldName, e);
}
}
}
}

Expand Down
20 changes: 15 additions & 5 deletions src/main/java/datawave/query/util/MetadataHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

Expand Down Expand Up @@ -516,13 +517,22 @@ public QueryModel getQueryModel(String modelTableName, String modelName, Collect
if (!mapping.isFieldMapping()) {
queryModel.setModelFieldAttributes(mapping.getModelFieldName(), mapping.getAttributes());
} else if (mapping.getDirection() == Direction.FORWARD) {
// Do not add a forward mapping entry
// when the replacement does not exist in the database
// If a direct match is found for the field in the database, add a forward mapping entry.
if (allFields.contains(mapping.getFieldName())) {
queryModel.addTermToModel(mapping.getModelFieldName(), mapping.getFieldName());
} else if (log.isTraceEnabled()) {
log.trace("Ignoring forward mapping of {} for {} because the metadata table has no reference to it", mapping.getFieldName(),
mapping.getModelFieldName());
} else {
// If a direct match was not found for the field name, it's possible that a regex pattern was supplied. Attempt to find matches
// based off matching against the field name as a pattern.
Pattern pattern = Pattern.compile(mapping.getFieldName());
Set<String> matches = allFields.stream().filter(field -> pattern.matcher(field).matches()).collect(Collectors.toSet());
if (!matches.isEmpty()) {
matches.forEach(field -> queryModel.addTermToModel(mapping.getModelFieldName(), field));
} else {
if (log.isTraceEnabled()) {
log.trace("Ignoring forward mapping of {} for {} because the metadata table has no reference to it", mapping.getFieldName(),
mapping.getModelFieldName());
}
}
}
} else {
queryModel.addTermToReverseModel(mapping.getFieldName(), mapping.getModelFieldName());
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/datawave/query/model/FieldMappingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package datawave.query.model;

import java.util.Collections;

import org.junit.Test;
import org.junit.jupiter.api.Assertions;

public class FieldMappingTest {

/**
* Verify that creating a forward mapping with a regular field name does not result in any exceptions.
*/
@Test
public void testForwardMappingWithPlainFieldName() {
Assertions.assertDoesNotThrow(() -> new FieldMapping("datatype", "DB_NAME", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()));
}

/**
* Verify that creating a forward mapping with an invalid pattern for the field name results in an exception.
*/
@Test
public void testForwardMappingWithInvalidPatternFieldName() {
Assertions.assertThrows(IllegalArgumentException.class,
() -> new FieldMapping("datatype", "[\\]", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()),
"Invalid regex pattern supplied for field name: [\\]");
}

/**
* Verify that creating a forward mapping with a valid pattern for the field name does not result in an exception.
*/
@Test
public void testForwardMappingWithValidPatternFieldName() {
Assertions.assertDoesNotThrow(() -> new FieldMapping("datatype", "DB_NAME.*", "FIELD_NAME", Direction.FORWARD, "ALL", Collections.emptySet()));
}
}
74 changes: 58 additions & 16 deletions src/test/java/datawave/query/util/MetadataHelperTableTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;

import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.BatchWriter;
Expand All @@ -34,6 +35,7 @@
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.LongCombiner;
import org.apache.accumulo.core.security.Authorizations;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -47,6 +49,10 @@
import datawave.data.type.LcType;
import datawave.data.type.Type;
import datawave.query.composite.CompositeMetadataHelper;
import datawave.query.model.Direction;
import datawave.query.model.FieldMapping;
import datawave.query.model.ModelKeyParser;
import datawave.query.model.QueryModel;

/**
* Integration test for the {@link MetadataHelper}.
Expand Down Expand Up @@ -146,6 +152,25 @@ private static void writeData() throws Exception {
// write some 'counts'
MetadataCardinalityCounts counts = new MetadataCardinalityCounts("DEFINITION", "define", 23L, 34L, 45L, 56L, 67L, 78L);
write(bw, "DEFINITION", "count", "define", counts.getValue());

// Write a model.
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "EVENT_DATE", "start-time", Direction.FORWARD, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "EVENT_DATE", "start-time", Direction.REVERSE, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "UUID", "unique-id", Direction.FORWARD, "", Collections.emptySet()),
"TEST_MODEL"));
bw.addMutation(ModelKeyParser.createMutation(new FieldMapping("", "UUID", "unique-id", Direction.REVERSE, "", Collections.emptySet()),
"TEST_MODEL"));
// Test using regex patterns in forward matching model mappings.
bw.addMutation(ModelKeyParser.createMutation(
new FieldMapping("", "TITLE|HEADER|DESIGNATION", "title", Direction.FORWARD, "", Collections.emptySet()), "TEST_MODEL"));
// Make sure the model fields appear when fetching all fields.
write(bw, "EVENT_DATE", "i", "datatype-a", EMPTY_VALUE);
write(bw, "UUID", "i", "datatype-a", EMPTY_VALUE);
write(bw, "TITLE", "i", "datatype-a", EMPTY_VALUE);
write(bw, "HEADER", "i", "datatype-a", EMPTY_VALUE);
write(bw, "DESIGNATION", "i", "datatype-a", EMPTY_VALUE);
}
}

Expand Down Expand Up @@ -262,9 +287,9 @@ public void testGetMetadata() throws Exception {
Metadata metadata = helper.getMetadata();

Set<String> datatypes = Set.of("datatype-a", "datatype-b");
Set<String> fields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION");
Set<String> indexOnlyFields = Set.of("DEFINITION");
Set<String> fields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexOnlyFields = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> termFrequencyFields = Set.of("DEFINITION");

assertEquals(datatypes, metadata.getDatatypes());
Expand All @@ -281,9 +306,9 @@ public void testGetMetadataWithDatatypeFilter() throws Exception {
Metadata metadata = helper.getMetadata(filter);

Set<String> datatypes = Set.of("datatype-a");
Set<String> fields = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY");
Set<String> indexedFields = Set.of("SHAPE", "DEFINITION");
Set<String> indexOnlyFields = Set.of("DEFINITION");
Set<String> fields = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexedFields = Set.of("SHAPE", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> indexOnlyFields = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
Set<String> termFrequencyFields = Set.of("DEFINITION");

assertEquals(datatypes, metadata.getDatatypes());
Expand All @@ -296,12 +321,12 @@ public void testGetMetadataWithDatatypeFilter() throws Exception {

@Test
public void testGetAllFields() throws Exception {
Set<String> expected = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY");
Set<String> expected = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getAllFields(Collections.emptySet()));

// and with filter
Set<String> filter = Collections.singleton("datatype-a");
expected = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY");
expected = Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getAllFields(filter));
}

Expand All @@ -313,7 +338,7 @@ public void testGetEvaluationOnlyFields() {

@Test
public void testGetNonEventFields() throws Exception {
Set<String> expected = Set.of("DEFINITION");
Set<String> expected = Set.of("DEFINITION", "EVENT_DATE", "HEADER", "TITLE", "UUID", "DESIGNATION");
assertEquals(expected, helper.getNonEventFields(Collections.emptySet()));

// then restrict the filter
Expand All @@ -339,16 +364,15 @@ public void testIsOverloadedCompositeField() {

@Test
public void testGetIndexOnlyFields() throws Exception {
Set<String> expected = Set.of("DEFINITION");
Set<String> expected = Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION");
assertEquals(expected, helper.getIndexOnlyFields(Collections.emptySet()));

// restrict filter
Set<String> filter = Collections.singleton("datatype-b");
expected = Set.of("DEFINITION");
assertEquals(expected, helper.getIndexOnlyFields(filter));
}

// skipping query model methods

@Test
public void testIsReverseIndexed() throws Exception {
// SHAPE is reverse indexed for datatype-a
Expand Down Expand Up @@ -575,10 +599,10 @@ public void testGetTermFrequencyFields() throws Exception {

@Test
public void testGetIndexedFields() throws Exception {
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION");
Set<String> indexedFields = Set.of("SHAPE", "COLOR", "DEFINITION", "EVENT_DATE", "DESIGNATION", "HEADER", "TITLE", "UUID");
assertEquals(indexedFields, helper.getIndexedFields(null));
assertEquals(indexedFields, helper.getIndexedFields(Collections.emptySet()));
assertEquals(Set.of("SHAPE", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), helper.getIndexedFields(Set.of("datatype-a")));
assertEquals(Set.of("COLOR", "DEFINITION"), helper.getIndexedFields(Set.of("datatype-b")));
assertEquals(Collections.emptySet(), helper.getIndexedFields(Set.of("datatype-c")));
}
Expand Down Expand Up @@ -797,15 +821,15 @@ public void testUniqueDatatypesFunction() {
public void testLoadAllFields() throws Exception {
Multimap<String,String> fields = helper.loadAllFields();
assertEquals(Set.of("datatype-a", "datatype-b"), fields.keySet());
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_ONLY"), fields.get("datatype-a"));
assertEquals(Set.of("SHAPE", "DEFINITION", "EVENT_ONLY", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), fields.get("datatype-a"));
assertEquals(Set.of("COLOR", "DEFINITION"), fields.get("datatype-b"));
}

@Test
public void testLoadIndexOnlyFields() throws Exception {
Multimap<String,String> fields = helper.loadIndexOnlyFields();
assertEquals(Set.of("datatype-a", "datatype-b"), fields.keySet());
assertEquals(Set.of("DEFINITION"), fields.get("datatype-a"));
assertEquals(Set.of("DEFINITION", "EVENT_DATE", "TITLE", "UUID", "HEADER", "DESIGNATION"), fields.get("datatype-a"));
assertEquals(Set.of("DEFINITION"), fields.get("datatype-b"));
}

Expand Down Expand Up @@ -883,6 +907,24 @@ public void testInternalTypeCache() throws TableNotFoundException, Instantiation
assertNotSame(typesFirstCall.iterator().next(), typesSecondCall.iterator().next());
}

@Test
public void testGetQueryModel() throws TableNotFoundException, ExecutionException {
QueryModel queryModel = helper.getQueryModel(METADATA_TABLE_NAME, "TEST_MODEL");

// Assert the forward mappings.
Multimap<String,String> forwardMappings = queryModel.getForwardQueryMapping();
Assertions.assertTrue(forwardMappings.containsEntry("start-time", "EVENT_DATE"));
Assertions.assertTrue(forwardMappings.containsEntry("unique-id", "UUID"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "TITLE"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "HEADER"));
Assertions.assertTrue(forwardMappings.containsEntry("title", "DESIGNATION"));

// Assert the reverse mappings.
Map<String,String> reverseMappings = queryModel.getReverseQueryMapping();
Assertions.assertEquals(reverseMappings.get("EVENT_DATE"), "start-time");
Assertions.assertEquals(reverseMappings.get("UUID"), "unique-id");
}

/**
* Assert that an iterable matches expectations
*
Expand Down

0 comments on commit 51f1307

Please sign in to comment.