Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved implementation of PrefixMapStd #1475

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 233 additions & 87 deletions jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,23 @@

package org.apache.jena.riot.system;

import static org.apache.jena.riot.system.PrefixLib.canonicalPrefix;
import static org.apache.jena.riot.system.PrefixLib.isSafeLocalPart;

import java.util.Collections ;
import java.util.Map ;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.Supplier;

import org.apache.jena.atlas.lib.Pair ;
import org.apache.jena.atlas.lib.Pair;
import org.apache.jena.atlas.lib.Trie;
import org.apache.jena.ext.com.google.common.cache.Cache;
import org.apache.jena.ext.com.google.common.cache.CacheBuilder;
import org.apache.jena.sparql.graph.PrefixMappingBase;

/**
* In-memory implementation of a {@link PrefixMap}.
Expand All @@ -45,105 +53,193 @@
* copy is cheaper than repeated reverse lookups would be.
*/
public class PrefixMapStd extends PrefixMapBase {
// Expansion map
final Map<String, String> prefixes = new ConcurrentHashMap<>();

// Immutable view of prefixes
private final Map<String, String> prefixes2 = Collections.unmodifiableMap(prefixes);
public static final int DFT_CACHE_SIZE = 1000;

// Abbreviation map used for common cases.
// This keeps the URI->prefix mappings for a computed guess at the answer, before
// resorting to a full search. See abbrev(String) below.
private final Map<String, String> uriToPrefix = new ConcurrentHashMap<>();
private ReadWriteLock rwl = new ReentrantReadWriteLock();

/**
* Creates a new empty prefix mapping
*/
public PrefixMapStd() {}
private Map<String, String> prefixToIri;
private Map<String, String> prefixToIriView;

/**
* Creates a new prefix mapping copied from an existing map
* @param prefixMap Prefix Map
*/
/** A trie for longest prefix lookups */
private Trie<String> iriToPrefixTrie = new Trie<>();

/** For exact matches of IRI strings the map is much faster than the trie */
private Map<String, String> iriToPrefixMap = new HashMap<>();

/** A cache for mapping iris to prefixes.
* Wrapping with Optional is needed because the Guava Cache does not allow for null values */
private Cache<String, Optional<String>> cache;

/** A generation counter that is incremented on modifications and which is
* used to invalidate the internal cache when needed.
* If generation and cacheVersion differ then the next prefix lookup will invalidate the cache and
/* set cacheVersion to generation */
private int generation = 0;
private int cacheVersion = 0;

public PrefixMapStd() {
this(DFT_CACHE_SIZE);
}

/** Copies the prefixes. Does not copy the cache. */
public PrefixMapStd(PrefixMap prefixMap) {
Objects.requireNonNull(prefixMap);
prefixes.putAll(prefixMap.getMapping());
this(DFT_CACHE_SIZE);
putAll(prefixMap);
}

@Override
public Map<String, String> getMapping() {
return prefixes2;
public PrefixMapStd(long prefixLookupCacheSize) {
this(new ConcurrentHashMap<>(), prefixLookupCacheSize);
}

@Override
public String get(String prefix) {
Objects.requireNonNull(prefix);
prefix = canonicalPrefix(prefix);
return prefixes.get(prefix);
/**
* Create a PrefixMapStd instance using the the specified prefix-to-iri map implementation and cache size.
* @param prefixToIri An empty map into which to store prefixes. Should not be changed externally.
* @param prefixLookupCacheSize The cache size for prefix lookups.
*/
public PrefixMapStd(Map<String, String> prefixToIri, long prefixLookupCacheSize) {
super();
Objects.requireNonNull(prefixToIri);
if (!prefixToIri.isEmpty()) {
// Best effort check; the caller may still perform concurrent modifications to the supplied map
throw new IllegalArgumentException("PrefixToIri map must be initially empty");
}
this.prefixToIri = prefixToIri;
this.prefixToIriView = Collections.unmodifiableMap(prefixToIri);
this.cache = CacheBuilder.newBuilder().maximumSize(prefixLookupCacheSize).build();
}

@Override
public void add(String prefix, String iri) {
Objects.requireNonNull(prefix);
Objects.requireNonNull(iri);
prefix = canonicalPrefix(prefix);
String oldURI = prefixes.get(prefix);
if ( oldURI != null )
uriToPrefix.remove(oldURI);
prefixes.put(prefix, iri);
uriToPrefix.put(iri.toString(), prefix) ;
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
execute(rwl.writeLock(), () -> {
String oldIri = prefixToIri.get(canonicalPrefix);
if (!Objects.equals(oldIri, iri)) {
if (oldIri != null) {
iriToPrefixTrie.remove(oldIri);
iriToPrefixMap.remove(oldIri);
}
prefixToIri.put(canonicalPrefix, iri);
iriToPrefixTrie.add(iri, canonicalPrefix);
iriToPrefixMap.put(iri, canonicalPrefix);
++generation;
}
});
}

/** See notes on reverse mappings in {@link PrefixMappingBase}.
* This is a complete implementation.
* <p>
* Test {@code AbstractTestPrefixMapping.testSecondPrefixDeletedUncoversPreviousMap}.
*/
@Override
public void delete(String prefix) {
Objects.requireNonNull(prefix);
prefix = canonicalPrefix(prefix);
prefixes.remove(prefix);
// Remove from the abbreviation map.
uriToPrefix.values().remove(prefix);
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
execute(rwl.writeLock(), () -> {
// Removal returns the previous value or null if there was none
String iriForPrefix = prefixToIri.remove(canonicalPrefix);
if (iriForPrefix != null) {
String prefixForIri = iriToPrefixMap.get(iriForPrefix);
if (canonicalPrefix.equals(prefixForIri)) {
iriToPrefixTrie.remove(iriForPrefix);
iriToPrefixMap.remove(prefixForIri);
}
++generation;
}
});
}

@Override
public void clear() {
prefixes.clear() ;
public Pair<String, String> abbrev(String iriStr) {
Objects.requireNonNull(iriStr);
return calculate(rwl.readLock(), () -> {
Pair<String, String> r = null;

String prefix = performPrefixLookup(iriStr);
String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null;

// Post process a found solution
if (prefix != null && iriForPrefix != null) {
String localName = iriStr.substring(iriForPrefix.length());
if (PrefixLib.isSafeLocalPart(localName)) {
r = Pair.create(prefix, localName);
}
}
return r;
});
}

@Override
public boolean containsPrefix(String prefix) {
public String abbreviate(String iriStr) {
Objects.requireNonNull(iriStr);
String result = null;
// Locking is only needed in abbrev
Pair<String, String> prefixAndLocalName = abbrev(iriStr);
if (prefixAndLocalName != null) {
String prefix = prefixAndLocalName.getLeft();
String ln = prefixAndLocalName.getRight();
// Safe for RDF/XML as well
if (strSafeFor(ln, ':')) {
result = prefix + ":" + ln;
}
}
return result;
}

@Override
public String get(String prefix) {
Objects.requireNonNull(prefix);
prefix = canonicalPrefix(prefix);
return prefixes.containsKey(prefix);
return calculate(rwl.readLock(), () -> {
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.get(canonicalPrefix);
});
}

/** Returns an unmodifiable and non-synchronized(!) view of the mappings */
@Override
public Map<String, String> getMapping() {
return prefixToIriView;
}

@Override
public String abbreviate(String uriStr) {
Objects.requireNonNull(uriStr);
Pair<String, String> p = abbrev(uriStr);
if (p == null)
return null;
return p.getLeft() + ":" + p.getRight();
public Map<String, String> getMappingCopy() {
Map<String, String> result = calculate(rwl.readLock(), () -> Map.copyOf(prefixToIri));
return result;
}

// This is thread safe (does not crash) - it is not thread-consistent (answer
// uncertain if the prefix mappings are in flux).
@Override
public Pair<String, String> abbrev(String uriStr) {
Objects.requireNonNull(uriStr);
// Fast path.
// Look for a prefix by URI ending "#" or "/"
// then look for that as a known prefix.
String candidate = getPossibleKey(uriStr);
if ( candidate != null ) {
String uriForPrefix = uriToPrefix.get(candidate);
if ( uriForPrefix != null ) {
// Fast track.
String ln = uriStr.substring(candidate.length());
if ( isSafeLocalPart(ln) )
return Pair.create(uriForPrefix, ln);
public void clear() {
execute(rwl.writeLock(), () -> {
if (!prefixToIri.isEmpty()) {
prefixToIri.clear();
iriToPrefixTrie.clear();
iriToPrefixMap.clear();
cache.invalidateAll();
++generation;
}
}
// Not in the uri -> prefix map. Crunch it.
return PrefixLib.abbrev(prefixes, uriStr, true);
});
}

@Override
public boolean isEmpty() {
return calculate(rwl.readLock(), () -> prefixToIri.isEmpty());
}

@Override
public int size() {
return calculate(rwl.readLock(), () -> prefixToIri.size());
}

@Override
public boolean containsPrefix(String prefix) {
Objects.requireNonNull(prefix);
return calculate(rwl.readLock(), () -> {
String canonicalPrefix = PrefixLib.canonicalPrefix(prefix);
return prefixToIri.containsKey(canonicalPrefix);
});
}

/**
Expand All @@ -154,25 +250,75 @@ public Pair<String, String> abbrev(String uriStr) {
* @return String or null
*/
protected static String getPossibleKey(String iriString) {
int index = iriString.lastIndexOf('#');
if (index > -1)
return iriString.substring(0, index + 1);
index = iriString.lastIndexOf('/');
if (index > -1)
return iriString.substring(0, index + 1);
// We could add ':' here, it is used as a separator in URNs.
// But it is a multiple use character and always present in the scheme name.
// This is a fast-track guess so don't try guessing based on ':'.
return null;
int n = iriString.length();
int i;
for (i = n - 1; i >= 0; --i) {
char c = iriString.charAt(i);
if (c == '#' || c == '/') {
// We could add ':' here, it is used as a separator in URNs.
// But it is a multiple use character and always present in the scheme name.
// This is a fast-track guess so don't try guessing based on ':'.
break;
}
}
String result = i >= 0 ? iriString.substring(0, i + 1) : null;
return result;
}

@Override
public boolean isEmpty() {
return prefixes.isEmpty();
private String performPrefixLookup(String iriStr) {
String prefix = null;
String iriForPrefix = getPossibleKey(iriStr);
// Try fast track first - if it produces a hit then
// no overhead writing to the cache is needed
// The drawback is that we do not necessarily get the longest prefix
if (iriForPrefix != null) {
prefix = iriToPrefixMap.get(iriForPrefix);
}

// If no solution yet then search for longest prefix
if (prefix == null) {
prefix = cachedPrefixLookup(iriStr).orElse(null);
}
return prefix;
}

@Override
public int size() {
return prefixes.size();
private Optional<String> cachedPrefixLookup(String iri) {
if (cacheVersion != generation) {
cache.invalidateAll();
cacheVersion = generation;
}

Optional<String> prefix;
try {
prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri)));
} catch (ExecutionException e) {
throw new RuntimeException("Unexpected failure during cache lookup", e);
}
return prefix;
}

private String uncachedPrefixLookup(String iriStr) {
String prefix = iriToPrefixTrie.longestMatch(iriStr);
return prefix;
}

private static void execute(Lock lock, Runnable runnable) {
lock.lock();
try {
runnable.run();
} finally {
lock.unlock();
}
}

private static <T> T calculate(Lock lock, Supplier<T> supplier) {
T result;
lock.lock();
try {
result = supplier.get();
} finally {
lock.unlock();
}
return result;
}
}