Skip to content

Commit

Permalink
Merge pull request #39 from NationalSecurityAgency/bugfix/issue-37
Browse files Browse the repository at this point in the history
Ensure Last Updated dates for Index Only fields are correct
  • Loading branch information
foster33 authored Jan 29, 2025
2 parents 05c02c3 + b668f5f commit 7099571
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,8 @@ private void transformEntry(Map.Entry<Key,Value> currEntry) throws IOException,
if (currField.getFieldName() == null) {
setFieldNameAndAlias();
}

// Determine the lastUpdated value for index-only fields without including timestamps from description rows.
if (currField.isIndexOnly() && currField.getLastUpdated() == null && !isColumnFamly(ColumnFamilyConstants.COLF_DESC)) {
if (currField.isIndexOnly() && !isColumnFamly(ColumnFamilyConstants.COLF_DESC)) {
setLastUpdated();
}
}
Expand Down Expand Up @@ -277,8 +276,15 @@ private void setType() {

// Set the last updated date for the current {@link DefaultMetadataField} based on the timestamp of the current entry.
private void setLastUpdated() {
currField.setLastUpdated(Instant.ofEpochMilli(currKey.getTimestamp()).atZone(ZoneId.systemDefault()).toLocalDateTime()
.format(DateTimeFormatter.ofPattern(TIMESTAMP_FORMAT)));
String formattedCurrentKeyTimeStamp = Instant.ofEpochMilli(currKey.getTimestamp()).atZone(ZoneId.systemDefault()).toLocalDateTime()
.format(DateTimeFormatter.ofPattern(TIMESTAMP_FORMAT));
if (currField.getLastUpdated() != null) {
if (Long.parseLong(currField.getLastUpdated()) < Long.parseLong(formattedCurrentKeyTimeStamp)) {
currField.setLastUpdated(formattedCurrentKeyTimeStamp);
}
} else {
currField.setLastUpdated(formattedCurrentKeyTimeStamp);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import static org.assertj.core.api.Assertions.assertThat;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -42,6 +44,7 @@ public class DefaultMetadataFieldScannerTest {

private static final String DATE = "20200115051230";
private static final long TIMESTAMP = ZonedDateTime.of(LocalDateTime.of(2020, 1, 15, 5, 12, 30), ZoneId.systemDefault()).toInstant().toEpochMilli();
private static final long DAY_AS_MILLISECONDS = 86400000;
private static final String MODEL_TABLE = "modelTable";
private static final String METADATA_TABLE = "metadataTable";
private static final String[] AUTH = {"PRIVATE"};
Expand All @@ -66,6 +69,8 @@ public DefaultFields getFields() {

private AccumuloClient connector;

private Map<String,String> expectedTimestamps;

private DefaultMetadataFieldScanner scanner;

@BeforeEach
Expand All @@ -75,6 +80,9 @@ public void setUp() throws Exception {
connector.securityOperations().changeUserAuthorizations("root", new Authorizations(AUTH));
connector.tableOperations().create(METADATA_TABLE);
connector.tableOperations().create(MODEL_TABLE);

expectedTimestamps = new HashMap<>();

populateMetadataTable();

Map<String,String> normalizerMapping = new HashMap<>();
Expand Down Expand Up @@ -117,8 +125,18 @@ public void whenRetrievingFields_givenNoDataTypeFilters_shouldReturnUnfilteredRe
name.setTypes(Collections.singletonList("Unknown"));
name.setLastUpdated(DATE);

DefaultMetadataField fooToken = new DefaultMetadataField();
fooToken.setFieldName("FOO_TOKEN");
fooToken.setDataType("tvmaze");
fooToken.setForwardIndexed(true);
fooToken.setReverseIndexed(false);
fooToken.setTokenized(true);
fooToken.setIndexOnly(true);
fooToken.setLastUpdated("20200120051230");
fooToken.setTypes(Collections.singletonList("Text"));

Collection<DefaultMetadataField> fields = scanner.getFields(Collections.emptyMap(), Collections.emptySet());
assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name);
assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name, fooToken);
}

@Test
Expand Down Expand Up @@ -178,11 +196,34 @@ public void whenRetrievingFields_givenAliases_shouldReturnResultsWithAliases() t
name.setTypes(Collections.singletonList("Unknown"));
name.setLastUpdated(DATE);

DefaultMetadataField fooToken = new DefaultMetadataField();
fooToken.setFieldName("FOO_TOKEN");
fooToken.setDataType("tvmaze");
fooToken.setForwardIndexed(true);
fooToken.setReverseIndexed(false);
fooToken.setTokenized(true);
fooToken.setIndexOnly(true);
fooToken.setLastUpdated("20200120051230");
fooToken.setTypes(Collections.singletonList("Text"));

Map<String,String> aliases = new HashMap<>();
aliases.put("BAR_FIELD", "bar_field_alias");
aliases.put("CONTRIBUTOR_ID", "contributor_id_alias");
Collection<DefaultMetadataField> fields = scanner.getFields(aliases, Collections.emptySet());
assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name);
assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name, fooToken);
}

@Test
public void lastUpdatedTimeIsCorrect() throws Exception {
Collection<DefaultMetadataField> fields = scanner.getFields(Collections.emptyMap(), Collections.emptySet());

for (DefaultMetadataField field : fields) {
assertThat(expectedTimestamps).containsEntry(field.getFieldName(), field.getLastUpdated());
}
}

private String formatTimestamp(Long timestamp) {
return Instant.ofEpochMilli(timestamp).atZone(ZoneId.systemDefault()).toLocalDateTime().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
}

private void populateMetadataTable() throws TableNotFoundException, MutationsRejectedException {
Expand All @@ -193,25 +234,41 @@ private void populateMetadataTable() throws TableNotFoundException, MutationsRej
barField.put(new Text(ColumnFamilyConstants.COLF_TF), new Text("csv"), TIMESTAMP, new Value());
barField.put(new Text(ColumnFamilyConstants.COLF_T), new Text("csv\0datawave.data.type.LcNoDiacriticsType"), TIMESTAMP, new Value());
barField.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("csv"), new ColumnVisibility("PRIVATE"), TIMESTAMP, new Value("Barfield Description"));
expectedTimestamps.put("BAR_FIELD", formatTimestamp(TIMESTAMP));

Mutation contributorId = new Mutation(new Text("CONTRIBUTOR_ID"));
contributorId.put(new Text(ColumnFamilyConstants.COLF_E), new Text("enwiki"), TIMESTAMP, new Value());
contributorId.put(new Text(ColumnFamilyConstants.COLF_I), new Text("enwiki"), TIMESTAMP, new Value());
contributorId.put(new Text(ColumnFamilyConstants.COLF_T), new Text("enwiki\0datawave.data.type.NumberType"), TIMESTAMP, new Value());
contributorId.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("enwiki"), new ColumnVisibility("PRIVATE"), TIMESTAMP,
new Value("ContributorId Description"));
expectedTimestamps.put("CONTRIBUTOR_ID", formatTimestamp(TIMESTAMP));

Mutation name = new Mutation(new Text("NAME"));
name.put(new Text(ColumnFamilyConstants.COLF_E), new Text("tvmaze"), TIMESTAMP, new Value());
name.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP, new Value());
name.put(new Text(ColumnFamilyConstants.COLF_RI), new Text("tvmaze"), TIMESTAMP, new Value());
name.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0not.a.known.type"), TIMESTAMP, new Value());
expectedTimestamps.put("NAME", formatTimestamp(TIMESTAMP));

Mutation fooToken = new Mutation(new Text("FOO_TOKEN"));
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP, new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP + (DAY_AS_MILLISECONDS * 2), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP + (DAY_AS_MILLISECONDS * 3), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP - (DAY_AS_MILLISECONDS * 2), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP - (DAY_AS_MILLISECONDS * 3), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP + (DAY_AS_MILLISECONDS * 4), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP + (DAY_AS_MILLISECONDS * 5), new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_TF), new Text("tvmaze"), TIMESTAMP, new Value());
fooToken.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0datawave.data.type.LcNoDiacriticsType"), TIMESTAMP, new Value());
expectedTimestamps.put("FOO_TOKEN", formatTimestamp(TIMESTAMP + (DAY_AS_MILLISECONDS * 5)));

BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1);
BatchWriter writer = connector.createBatchWriter(METADATA_TABLE, bwConfig);
writer.addMutation(barField);
writer.addMutation(contributorId);
writer.addMutation(name);
writer.addMutation(fooToken);
writer.flush();
writer.close();
}
Expand Down

0 comments on commit 7099571

Please sign in to comment.