-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/serialization minimap #23
Changes from 32 commits
c413d85
afb6e5e
2a61d7e
0a07c22
2e7fa76
02dbeb5
7f7602c
2260602
4fc2d1e
117df42
bab744a
6e5d9d6
0ea1e9e
fea8721
ecc74cd
898f404
1048cab
1a4f8f3
359f647
d500bb9
afb24ab
ec2511a
4a4dc02
cb6551e
a0664ab
167b1c9
d69c97f
a7f6893
747f013
b1b87b2
0fa5693
f692d8a
66d34f4
e0fb91d
ac4623a
f1148a3
3646dad
5448b22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,89 @@ | ||
package datawave.query.util; | ||
|
||
import com.google.common.base.Splitter; | ||
import com.google.common.collect.HashMultimap; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.google.common.collect.ImmutableMultimap; | ||
import com.google.common.collect.Iterables; | ||
import com.google.common.collect.Lists; | ||
import com.google.common.collect.Maps; | ||
import com.google.common.collect.Multimap; | ||
import com.google.common.collect.Sets; | ||
|
||
import java.io.ObjectInputStream; | ||
import java.io.ObjectOutputStream; | ||
import java.io.Serializable; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.Iterator; | ||
import java.util.LinkedHashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
import java.util.Set; | ||
|
||
import com.google.common.base.Splitter; | ||
import com.google.common.collect.HashMultimap; | ||
import com.google.common.collect.ImmutableMultimap; | ||
import com.google.common.collect.Iterables; | ||
import com.google.common.collect.Lists; | ||
import com.google.common.collect.Maps; | ||
import com.google.common.collect.Multimap; | ||
import com.google.common.collect.Sets; | ||
import java.util.TreeMap; | ||
import java.util.TreeSet; | ||
import java.util.stream.Collectors; | ||
|
||
public class TypeMetadata implements Serializable { | ||
|
||
private Set<String> ingestTypes = Sets.newHashSet(); | ||
private Set<String> ingestTypes = new TreeSet<>(); | ||
|
||
private Set<String> fieldNames = Sets.newHashSet(); | ||
private Set<String> fieldNames = new TreeSet<>(); | ||
|
||
public Map<String,Integer> getIngestTypesMiniMap() { | ||
return ingestTypesMiniMap; | ||
} | ||
|
||
public void setIngestTypesMiniMap(Map<String,Integer> ingestTypesMiniMap) { | ||
this.ingestTypesMiniMap = ingestTypesMiniMap; | ||
} | ||
|
||
public Map<String,Integer> getDataTypesMiniMap() { | ||
return dataTypesMiniMap; | ||
} | ||
|
||
public void setDataTypesMiniMap(Map<String,Integer> dataTypesMiniMap) { | ||
this.dataTypesMiniMap = dataTypesMiniMap; | ||
} | ||
|
||
private Map<String,Integer> ingestTypesMiniMap; | ||
private Map<String,Integer> dataTypesMiniMap; | ||
|
||
// <ingestType, <fieldName, DataType(s)>> | ||
protected Map<String,Multimap<String,String>> typeMetadata; | ||
|
||
public static final Multimap<String,String> emptyMap = HashMultimap.create(); | ||
|
||
private static final String INGESTTYPE_PREFIX = "dts"; | ||
private static final String DATATYPES_PREFIX = "types"; | ||
|
||
public TypeMetadata() { | ||
typeMetadata = Maps.newHashMap(); | ||
ingestTypesMiniMap = new TreeMap<>(); | ||
dataTypesMiniMap = new TreeMap<>(); | ||
} | ||
|
||
public TypeMetadata(String in) { | ||
typeMetadata = Maps.newHashMap(); | ||
ingestTypesMiniMap = new TreeMap<>(); | ||
dataTypesMiniMap = new TreeMap<>(); | ||
this.fromString(in); | ||
} | ||
|
||
public TypeMetadata(TypeMetadata in) { | ||
typeMetadata = Maps.newHashMap(); | ||
ingestTypesMiniMap = new TreeMap<>(); | ||
dataTypesMiniMap = new TreeMap<>(); | ||
// make sure we do a deep copy to avoid access issues later | ||
for (Map.Entry<String,Multimap<String,String>> entry : in.typeMetadata.entrySet()) { | ||
for (Entry<String,Multimap<String,String>> entry : in.typeMetadata.entrySet()) { | ||
this.typeMetadata.put(entry.getKey(), HashMultimap.create(entry.getValue())); | ||
} | ||
this.ingestTypes.addAll(in.ingestTypes); | ||
this.fieldNames.addAll(in.fieldNames); | ||
this.ingestTypesMiniMap.putAll(in.getIngestTypesMiniMap()); | ||
this.dataTypesMiniMap.putAll(in.getDataTypesMiniMap()); | ||
} | ||
|
||
/** | ||
|
@@ -223,7 +262,7 @@ public TypeMetadata filter(Set<String> datatypeFilter) { | |
} | ||
|
||
TypeMetadata typeMetadata = new TypeMetadata(); | ||
typeMetadata.ingestTypes = datatypeFilter; | ||
typeMetadata.ingestTypes.addAll(datatypeFilter); | ||
typeMetadata.typeMetadata.putAll(localMap); | ||
return typeMetadata; | ||
} | ||
|
@@ -232,100 +271,150 @@ public boolean isEmpty() { | |
return this.keySet().isEmpty(); | ||
} | ||
|
||
private static String[] parse(String in, char c) { | ||
List<String> list = Lists.newArrayList(); | ||
boolean inside = false; | ||
int start = 0; | ||
for (int i = 0; i < in.length(); i++) { | ||
if (in.charAt(i) == '[') | ||
inside = true; | ||
if (in.charAt(i) == ']') | ||
inside = false; | ||
if (in.charAt(i) == c && !inside) { | ||
list.add(in.substring(start, i)); | ||
start = i + 1; | ||
} | ||
} | ||
list.add(in.substring(start)); | ||
return Iterables.toArray(list, String.class); | ||
} | ||
|
||
private static Map<String,Integer> parseTypes(String typeEntry) { | ||
// dts:[0:ingest1,1:ingest2] | ||
// types:[0:DateType,1:IntegerType,2:LcType] | ||
|
||
// remove type designation and leading/trailing brackets | ||
String types = typeEntry.split(":\\[")[1]; | ||
String typeEntries = types.substring(0, types.length() - 1); | ||
|
||
Map<String,Integer> typeMap = new TreeMap<>(); | ||
|
||
for (String entry : typeEntries.split(",")) { | ||
String[] entryParts = entry.split(":"); | ||
typeMap.put(entryParts[1], Integer.valueOf(entryParts[0])); | ||
} | ||
|
||
return typeMap; | ||
} | ||
|
||
public String toString() { | ||
StringBuilder sb = new StringBuilder(); | ||
|
||
Set<String> fieldNames = Sets.newHashSet(); | ||
for (String ingestType : typeMetadata.keySet()) { | ||
// create and append ingestTypes mini-map | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm looking at the minimap for one of the tests and I'm wondering if the serialization schema can be simplified (and by extension, if the serialization/deserialization code can be simplified). Take this for example:
Serialization should be simpler as well. It's calling |
||
sb.append("dts:["); | ||
Iterator<String> ingestIter = ingestTypes.iterator(); | ||
for (int i = 0; i < ingestTypes.size(); i++) { | ||
String ingestType = ingestIter.next(); | ||
sb.append(i).append(":"); | ||
sb.append(ingestType); | ||
sb.append(ingestIter.hasNext() ? "," : "];"); | ||
getIngestTypesMiniMap().put(ingestType, i); | ||
} | ||
|
||
// create and append dataTypes mini-map | ||
sb.append("types:["); | ||
Iterator<Multimap<String,String>> typesIter = typeMetadata.values().iterator(); | ||
Set<String> dataTypes = new TreeSet<>(); | ||
while (typesIter.hasNext()) { | ||
dataTypes.addAll(typesIter.next().values()); | ||
} | ||
|
||
Iterator<String> dataIter = dataTypes.iterator(); | ||
for (int i = 0; i < dataTypes.size(); i++) { | ||
String dataType = dataIter.next(); | ||
sb.append(i).append(":"); | ||
sb.append(dataType); | ||
sb.append(dataIter.hasNext() ? "," : "];"); | ||
getDataTypesMiniMap().put(dataType, i); | ||
} | ||
|
||
// append fieldNames and their associated ingestTypes and Normalizers | ||
// ensure ordering for ease of type -> mini-map mapping | ||
Set<String> fieldNames = new TreeSet<>(); | ||
Set<String> ingestTypes = typeMetadata.keySet().stream().sorted().collect(Collectors.toCollection(LinkedHashSet::new)); | ||
for (String ingestType : ingestTypes) { | ||
fieldNames.addAll(typeMetadata.get(ingestType).keySet()); | ||
} | ||
|
||
for (String fieldName : fieldNames) { | ||
if (sb.length() > 0) { | ||
sb.append(';'); | ||
} | ||
|
||
sb.append(fieldName).append(':'); | ||
sb.append('['); | ||
boolean firstField = true; | ||
for (String ingestType : typeMetadata.keySet()) { | ||
if (!typeMetadata.get(ingestType).containsKey(fieldName)) | ||
Iterator<String> fieldIter = fieldNames.iterator(); | ||
while (fieldIter.hasNext()) { | ||
String fieldName = fieldIter.next(); | ||
sb.append(fieldName).append(":["); | ||
Iterator<String> iIter = ingestTypes.iterator(); | ||
while (iIter.hasNext()) { | ||
String ingestType = iIter.next(); | ||
if (!typeMetadata.get(ingestType).containsKey(fieldName)) { | ||
continue; | ||
if (!firstField) | ||
sb.append(';'); | ||
firstField = false; | ||
sb.append(ingestType); | ||
sb.append(':'); | ||
boolean first = true; | ||
for (String type : typeMetadata.get(ingestType).get(fieldName)) { | ||
if (!first) | ||
sb.append(','); | ||
sb.append(type); | ||
first = false; | ||
} | ||
for (String dataType : typeMetadata.get(ingestType).get(fieldName)) { | ||
sb.append(getIngestTypesMiniMap().get(ingestType)).append(':'); | ||
sb.append(getDataTypesMiniMap().get(dataType)); | ||
} | ||
sb.append(iIter.hasNext() ? "," : ""); | ||
} | ||
sb.append(']'); | ||
sb.append(fieldIter.hasNext() ? "];" : "]"); | ||
} | ||
|
||
return sb.toString(); | ||
} | ||
|
||
private void fromString(String data) { | ||
// was: | ||
// field1:a,b;field2:d,e;field3:y,z | ||
|
||
// post-fix: String should look like this: | ||
// field1:[type1:a,b;type2:b];field2:[type1:a,b;type2:a,c] | ||
fieldNames = Sets.newHashSet(); | ||
String[] entries = parse(data, ';'); | ||
for (String entry : entries) { | ||
String[] entrySplits = parse(entry, ':'); | ||
if (2 != entrySplits.length) { | ||
// Do nothing | ||
} else { | ||
// entrySplits[1] looks like this: | ||
// [type1:a,b;type2:b] - split it on the ';' | ||
// get rid of the leading and trailing brackets: | ||
entrySplits[1] = entrySplits[1].substring(1, entrySplits[1].length() - 1); | ||
String[] values = parse(entrySplits[1], ';'); | ||
|
||
for (String value : values) { | ||
|
||
String[] vs = Iterables.toArray(Splitter.on(':').omitEmptyStrings().trimResults().split(value), String.class); | ||
|
||
if (entries.length > 2) { | ||
for (String entry : entries) { | ||
if (entry.startsWith(INGESTTYPE_PREFIX)) { | ||
setIngestTypesMiniMap(parseTypes(entry)); | ||
} else if (entry.startsWith(DATATYPES_PREFIX)) { | ||
setDataTypesMiniMap(parseTypes(entry)); | ||
} else { | ||
String[] entrySplits = parse(entry, ':'); | ||
|
||
Multimap<String,String> mm = typeMetadata.get(vs[0]); | ||
if (null == mm) { | ||
mm = HashMultimap.create(); | ||
typeMetadata.put(vs[0], mm); | ||
} | ||
// get rid of the leading and trailing brackets: | ||
entrySplits[1] = entrySplits[1].substring(1, entrySplits[1].length() - 1); | ||
String[] values = parse(entrySplits[1], ','); | ||
|
||
String[] rhs = Iterables.toArray(Splitter.on(',').omitEmptyStrings().trimResults().split(vs[1]), String.class); | ||
this.ingestTypes.add(vs[0]); | ||
for (String r : rhs) { | ||
mm.put(entrySplits[0], r); | ||
for (String aValue : values) { | ||
if (!aValue.isEmpty()) { // ignore last entry for trailing comma | ||
// @formatter:off | ||
String[] vs = Iterables | ||
.toArray(Splitter.on(':') | ||
.omitEmptyStrings() | ||
.trimResults() | ||
.split(aValue), String.class); | ||
|
||
String ingestType = ImmutableMap.copyOf(getIngestTypesMiniMap()) | ||
.entrySet() | ||
.stream() | ||
.filter(e -> e.getValue().equals(Integer.valueOf(vs[0]))) | ||
.map(Entry::getKey) | ||
.findFirst().get(); | ||
|
||
String dataType = ImmutableMap.copyOf(getDataTypesMiniMap()) | ||
.entrySet() | ||
.stream() | ||
.filter(e -> e.getValue().equals(Integer.valueOf(vs[1]))) | ||
.map(Entry::getKey) | ||
.findFirst().get(); | ||
// @formatter:on | ||
|
||
this.addTypeMetadata(entrySplits[0], ingestType, dataType); | ||
} | ||
} | ||
fieldNames.add(entrySplits[0]); | ||
} | ||
fieldNames.add(entrySplits[0]); | ||
} | ||
} | ||
} | ||
|
||
private static String[] parse(String in, char c) { | ||
List<String> list = Lists.newArrayList(); | ||
boolean inside = false; | ||
int start = 0; | ||
for (int i = 0; i < in.length(); i++) { | ||
if (in.charAt(i) == '[') | ||
inside = true; | ||
if (in.charAt(i) == ']') | ||
inside = false; | ||
if (in.charAt(i) == c && !inside) { | ||
list.add(in.substring(start, i)); | ||
start = i + 1; | ||
} | ||
} | ||
list.add(in.substring(start)); | ||
return Iterables.toArray(list, String.class); | ||
} | ||
|
||
@Override | ||
|
@@ -346,20 +435,18 @@ public boolean equals(Object obj) { | |
return false; | ||
TypeMetadata other = (TypeMetadata) obj; | ||
if (typeMetadata == null) { | ||
if (other.typeMetadata != null) | ||
return false; | ||
} else if (!toString().equals(obj.toString())) { | ||
return false; | ||
} | ||
return true; | ||
return other.typeMetadata == null; | ||
} else | ||
return toString().equals(obj.toString()); | ||
} | ||
|
||
private void writeObject(ObjectOutputStream out) throws Exception { | ||
out.writeObject(this.toString()); | ||
} | ||
|
||
private void readObject(ObjectInputStream in) throws Exception { | ||
this.ingestTypes = Sets.newHashSet(); | ||
this.ingestTypes = Sets.newTreeSet(); | ||
this.fieldNames = Sets.newTreeSet(); | ||
this.typeMetadata = Maps.newHashMap(); | ||
this.fromString((String) in.readObject()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're going to make these TreeSets then the variable should be migrated to a SortedSet