Skip to content

Commit

Permalink
GH-1474: Improved PrefixMapStd
Browse files Browse the repository at this point in the history
  • Loading branch information
Aklakan committed Aug 24, 2022
1 parent b78216e commit a422853
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 311 deletions.
265 changes: 132 additions & 133 deletions jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.Supplier;

import org.apache.jena.atlas.lib.Pair;
import org.apache.jena.atlas.lib.Trie;
import org.apache.jena.ext.com.google.common.base.Stopwatch;
import org.apache.jena.ext.com.google.common.cache.Cache;
import org.apache.jena.ext.com.google.common.cache.CacheBuilder;
import org.apache.jena.sparql.graph.PrefixMappingBase;
Expand All @@ -54,6 +56,8 @@ public class PrefixMapStd extends PrefixMapBase {

public static final int DFT_CACHE_SIZE = 1000;

private ReadWriteLock rwl = new ReentrantReadWriteLock();

private Map<String, String> prefixToIri = new LinkedHashMap<>();
private Map<String, String> prefixToIriView = Collections.unmodifiableMap(prefixToIri);

Expand Down Expand Up @@ -89,73 +93,24 @@ public PrefixMapStd(long longestMatchCacheSize) {
this.cache = CacheBuilder.newBuilder().maximumSize(longestMatchCacheSize).build();
}

protected static String getPossibleKey(String iriString) {
int n = iriString.length();
int i;
for (i = n - 1; i >= 0; --i) {
char c = iriString.charAt(i);
if (c == '#' || c == '/') {
// We could add ':' here, it is used as a separator in URNs.
// But it is a multiple use character and always present in the scheme name.
// This is a fast-track guess so don't try guessing based on ':'.
break;
}
}
String result = i >= 0 ? iriString.substring(0, i + 1) : null;
return result;
}

protected String performPrefixLookup(String iriStr) {
String prefix = null;
String iriForPrefix = getPossibleKey(iriStr);
// Try fast track first - if it produces a hit then
// no overhead writing to the cache is needed
// The drawback is that we do not necessarily get the longest prefix
if (iriForPrefix != null) {
prefix = iriToPrefixMap.get(iriForPrefix);
}

// If no solution yet then search for longest prefix
if (prefix == null) {
prefix = cachedPrefixLookup(iriStr).orElse(null);
}
return prefix;
}

protected Optional<String> cachedPrefixLookup(String iri) {
if (cacheVersion != generation) {
cache.invalidateAll();
cacheVersion = generation;
}

Optional<String> prefix;
try {
prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri)));
} catch (ExecutionException e) {
throw new RuntimeException("Unexpected failure during cache lookup", e);
}
return prefix;
}

protected String uncachedPrefixLookup(String iriStr) {
String prefix = iriToPrefixTrie.longestMatch(iriStr);
return prefix;
}

@Override
public void add(String prefix, String iri) {
Objects.requireNonNull(prefix);
Objects.requireNonNull(iri);
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
String oldIri = prefixToIri.get(canonicalPrefix);
if (oldIri != null) {
iriToPrefixTrie.remove(oldIri);
iriToPrefixMap.remove(oldIri);
}
prefixToIri.put(canonicalPrefix, iri);
iriToPrefixTrie.add(iri, canonicalPrefix);
iriToPrefixMap.put(iri, canonicalPrefix);
++generation;
runWithLock(rwl.writeLock(), () -> {
String oldIri = prefixToIri.get(canonicalPrefix);
if (!Objects.equals(oldIri, iri)) {
if (oldIri != null) {
iriToPrefixTrie.remove(oldIri);
iriToPrefixMap.remove(oldIri);
}
prefixToIri.put(canonicalPrefix, iri);
iriToPrefixTrie.add(iri, canonicalPrefix);
iriToPrefixMap.put(iri, canonicalPrefix);
++generation;
}
});
}

/** See notes on reverse mappings in {@link PrefixMappingBase}.
Expand All @@ -167,40 +122,45 @@ public void add(String prefix, String iri) {
public void delete(String prefix) {
Objects.requireNonNull(prefix);
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
String iriForPrefix = prefixToIri.get(canonicalPrefix);
if (iriForPrefix != null) {
prefixToIri.remove(canonicalPrefix);
String prefixForIri = iriToPrefixMap.get(iriForPrefix);
if (canonicalPrefix.equals(prefixForIri)) {
iriToPrefixTrie.remove(iriForPrefix);
iriToPrefixMap.remove(prefixForIri);
runWithLock(rwl.writeLock(), () -> {
// Removal returns the previous value or null if there was none
String iriForPrefix = prefixToIri.remove(canonicalPrefix);
if (iriForPrefix != null) {
String prefixForIri = iriToPrefixMap.get(iriForPrefix);
if (canonicalPrefix.equals(prefixForIri)) {
iriToPrefixTrie.remove(iriForPrefix);
iriToPrefixMap.remove(prefixForIri);
}
++generation;
}
}
++generation;
});
}

@Override
public Pair<String, String> abbrev(String iriStr) {
Objects.requireNonNull(iriStr);
Pair<String, String> result = null;
return calcWithLock(rwl.readLock(), () -> {
Pair<String, String> r = null;

String prefix = performPrefixLookup(iriStr);
String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null;
String prefix = performPrefixLookup(iriStr);
String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null;

// Post process a found solution
if (prefix != null && iriForPrefix != null) {
String localName = iriStr.substring(iriForPrefix.length());
if (PrefixLib.isSafeLocalPart(localName)) {
result = Pair.create(prefix, localName);
// Post process a found solution
if (prefix != null && iriForPrefix != null) {
String localName = iriStr.substring(iriForPrefix.length());
if (PrefixLib.isSafeLocalPart(localName)) {
r = Pair.create(prefix, localName);
}
}
}
return result;
return r;
});
}

@Override
public String abbreviate(String iriStr) {
Objects.requireNonNull(iriStr);
String result = null;
// Locking is only needed in abbrev
Pair<String, String> prefixAndLocalName = abbrev(iriStr);
if (prefixAndLocalName != null) {
String prefix = prefixAndLocalName.getLeft();
Expand All @@ -216,81 +176,120 @@ public String abbreviate(String iriStr) {
@Override
public String get(String prefix) {
Objects.requireNonNull(prefix);
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.get(canonicalPrefix);
return calcWithLock(rwl.readLock(), () -> {
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.get(canonicalPrefix);
});
}

/** Returns an unmodifiable and non-synchronized(!) view of the mappings */
@Override
public Map<String, String> getMapping() {
return prefixToIriView;
}

@Override
public void clear() {
cache.invalidateAll();
prefixToIri.clear();
iriToPrefixTrie.clear();
iriToPrefixMap.clear();
++generation;
runWithLock(rwl.writeLock(), () -> {
if (!prefixToIri.isEmpty()) {
prefixToIri.clear();
iriToPrefixTrie.clear();
iriToPrefixMap.clear();
cache.invalidateAll();
++generation;
}
});
}

@Override
public boolean isEmpty() {
return prefixToIri.isEmpty();
return calcWithLock(rwl.readLock(), () -> prefixToIri.isEmpty());
}

@Override
public int size() {
return prefixToIri.size();
return calcWithLock(rwl.readLock(), () -> prefixToIri.size());
}

@Override
public boolean containsPrefix(String prefix) {
Objects.requireNonNull(prefix);
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.containsKey(canonicalPrefix);
return calcWithLock(rwl.readLock(), () -> {
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.containsKey(canonicalPrefix);
});
}

public static void main(String[] args) throws Exception {

String[][] baseIris = new String[2][];
baseIris[0] = new String[]{"http://example.org/", "/"};
baseIris[1] = new String[]{ "urn:foo:bar:", ":"};

for (int runId = 0; runId < 5; ++ runId) {
for (int baseId = 0; baseId < baseIris.length; ++baseId) {
String[] e = baseIris[baseId];
String baseIriStr = e[0];
String separator = e[1];

for(int approachId = 0; approachId < 2; ++approachId) {
// Select the prefix map implementation: 0 -> improved, 1 -> original
PrefixMap pm = approachId == 0 ? new PrefixMapStdOrig() : new PrefixMapStd();

// Initialize some prefixes
for (int i = 0; i < 2000; ++i) {
pm.add("ns" + i, baseIriStr + i + separator);
}

// Lookup with the same IRI (always cache hit)
Stopwatch sw = Stopwatch.createStarted();
String staticIri = baseIriStr + "1" + separator + "foobar";
for (int i = 0; i < 1000000; ++i) {
String abbr = pm.abbreviate(staticIri);
}
System.out.println(String.format("Run %d with base <%s> and separator %s using approach %d: Static IRI lookups took %.3f seconds", runId, baseIriStr, separator, approachId, sw.elapsed(TimeUnit.MILLISECONDS) * 0.001));

// Lookup with different IRIs
Stopwatch sw2 = Stopwatch.createStarted();
for (int i = 0; i < 1000000; ++i) {
String iriStr = baseIriStr + (i % 10000) + separator + "foobar";
String abbr = pm.abbreviate(iriStr);
}
System.out.println(String.format("Run %d with base <%s> and separator %s using approach %d: Dynamic IRI lookups took %.3f seconds", runId, baseIriStr, separator, approachId, sw2.elapsed(TimeUnit.MILLISECONDS) * 0.001));

System.out.println();
}
private static String getPossibleKey(String iriString) {
int n = iriString.length();
int i;
for (i = n - 1; i >= 0; --i) {
char c = iriString.charAt(i);
if (c == '#' || c == '/') {
// We could add ':' here, it is used as a separator in URNs.
// But it is a multiple use character and always present in the scheme name.
// This is a fast-track guess so don't try guessing based on ':'.
break;
}
}
String result = i >= 0 ? iriString.substring(0, i + 1) : null;
return result;
}

private String performPrefixLookup(String iriStr) {
String prefix = null;
String iriForPrefix = getPossibleKey(iriStr);
// Try fast track first - if it produces a hit then
// no overhead writing to the cache is needed
// The drawback is that we do not necessarily get the longest prefix
if (iriForPrefix != null) {
prefix = iriToPrefixMap.get(iriForPrefix);
}

// If no solution yet then search for longest prefix
if (prefix == null) {
prefix = cachedPrefixLookup(iriStr).orElse(null);
}
return prefix;
}

private Optional<String> cachedPrefixLookup(String iri) {
if (cacheVersion != generation) {
cache.invalidateAll();
cacheVersion = generation;
}

Optional<String> prefix;
try {
prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri)));
} catch (ExecutionException e) {
throw new RuntimeException("Unexpected failure during cache lookup", e);
}
return prefix;
}

private String uncachedPrefixLookup(String iriStr) {
String prefix = iriToPrefixTrie.longestMatch(iriStr);
return prefix;
}

private static void runWithLock(Lock lock, Runnable runnable) {
lock.lock();
try {
runnable.run();
} finally {
lock.unlock();
}
}

private static <T> T calcWithLock(Lock lock, Supplier<T> supplier) {
T result;
lock.lock();
try {
result = supplier.get();
} finally {
lock.unlock();
}
return result;
}
}
Loading

0 comments on commit a422853

Please sign in to comment.