diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java b/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java index 1ee20cdaa04..b653d2e176b 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java @@ -18,15 +18,23 @@ package org.apache.jena.riot.system; -import static org.apache.jena.riot.system.PrefixLib.canonicalPrefix; -import static org.apache.jena.riot.system.PrefixLib.isSafeLocalPart; - -import java.util.Collections ; -import java.util.Map ; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Supplier; -import org.apache.jena.atlas.lib.Pair ; +import org.apache.jena.atlas.lib.Pair; +import org.apache.jena.atlas.lib.Trie; +import org.apache.jena.ext.com.google.common.cache.Cache; +import org.apache.jena.ext.com.google.common.cache.CacheBuilder; +import org.apache.jena.sparql.graph.PrefixMappingBase; /** * In-memory implementation of a {@link PrefixMap}. @@ -45,134 +53,243 @@ * copy is cheaper than repeated reverse lookups would be. */ public class PrefixMapStd extends PrefixMapBase { - // Expansion map - final Map prefixes = new ConcurrentHashMap<>(); - // Immutable view of prefixes - private final Map prefixes2 = Collections.unmodifiableMap(prefixes); + public static final int DFT_CACHE_SIZE = 1000; - // Abbreviation map used for common cases. - // This keeps the URI->prefix mappings for a computed guess at the answer, before - // resorting to a full search. See abbrev(String) below. - private final Map uriToPrefix = new ConcurrentHashMap<>(); + private ReadWriteLock rwl = new ReentrantReadWriteLock(); - /** - * Creates a new empty prefix mapping - */ - public PrefixMapStd() {} + private Map prefixToIri = new LinkedHashMap<>(); + private Map prefixToIriView = Collections.unmodifiableMap(prefixToIri); - /** - * Creates a new prefix mapping copied from an existing map - * @param prefixMap Prefix Map - */ - public PrefixMapStd(PrefixMap prefixMap) { - Objects.requireNonNull(prefixMap); - prefixes.putAll(prefixMap.getMapping()); + /** A trie for longest prefix lookups */ + private Trie iriToPrefixTrie = new Trie<>(); + + /** For exact matches of IRI strings the map is much faster than the trie */ + private Map iriToPrefixMap = new HashMap<>(); + + /** A cache for mapping iris to prefixes. + * Wrapping with Optional is needed because the Guava Cache does not allow for null values */ + private Cache> cache; + + /** A generation counter that is incremented on modifications and which is + * used to invalidate the internal cache when needed. + * If generation and cacheVersion differ then the next prefix lookup will invalidate the cache and + /* set cacheVersion to generation */ + private int generation = 0; + private int cacheVersion = 0; + + public PrefixMapStd() { + this(DFT_CACHE_SIZE); } - @Override - public Map getMapping() { - return prefixes2; + /** Copies the prefixes. Does not copy the cache. */ + public PrefixMapStd(PrefixMap prefixMap) { + this(DFT_CACHE_SIZE); + putAll(prefixMap); } - @Override - public String get(String prefix) { - Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - return prefixes.get(prefix); + public PrefixMapStd(long longestMatchCacheSize) { + super(); + this.cache = CacheBuilder.newBuilder().maximumSize(longestMatchCacheSize).build(); } @Override public void add(String prefix, String iri) { Objects.requireNonNull(prefix); Objects.requireNonNull(iri); - prefix = canonicalPrefix(prefix); - String oldURI = prefixes.get(prefix); - if ( oldURI != null ) - uriToPrefix.remove(oldURI); - prefixes.put(prefix, iri); - uriToPrefix.put(iri.toString(), prefix) ; + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + runWithLock(rwl.writeLock(), () -> { + String oldIri = prefixToIri.get(canonicalPrefix); + if (!Objects.equals(oldIri, iri)) { + if (oldIri != null) { + iriToPrefixTrie.remove(oldIri); + iriToPrefixMap.remove(oldIri); + } + prefixToIri.put(canonicalPrefix, iri); + iriToPrefixTrie.add(iri, canonicalPrefix); + iriToPrefixMap.put(iri, canonicalPrefix); + ++generation; + } + }); } + /** See notes on reverse mappings in {@link PrefixMappingBase}. + * This is a complete implementation. + *

+ * Test {@code AbstractTestPrefixMapping.testSecondPrefixDeletedUncoversPreviousMap}. + */ @Override public void delete(String prefix) { Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - prefixes.remove(prefix); - // Remove from the abbreviation map. - uriToPrefix.values().remove(prefix); + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + runWithLock(rwl.writeLock(), () -> { + // Removal returns the previous value or null if there was none + String iriForPrefix = prefixToIri.remove(canonicalPrefix); + if (iriForPrefix != null) { + String prefixForIri = iriToPrefixMap.get(iriForPrefix); + if (canonicalPrefix.equals(prefixForIri)) { + iriToPrefixTrie.remove(iriForPrefix); + iriToPrefixMap.remove(prefixForIri); + } + ++generation; + } + }); } @Override - public void clear() { - prefixes.clear() ; + public Pair abbrev(String iriStr) { + Objects.requireNonNull(iriStr); + return calcWithLock(rwl.readLock(), () -> { + Pair r = null; + + String prefix = performPrefixLookup(iriStr); + String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null; + + // Post process a found solution + if (prefix != null && iriForPrefix != null) { + String localName = iriStr.substring(iriForPrefix.length()); + if (PrefixLib.isSafeLocalPart(localName)) { + r = Pair.create(prefix, localName); + } + } + return r; + }); } @Override - public boolean containsPrefix(String prefix) { - Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - return prefixes.containsKey(prefix); + public String abbreviate(String iriStr) { + Objects.requireNonNull(iriStr); + String result = null; + // Locking is only needed in abbrev + Pair prefixAndLocalName = abbrev(iriStr); + if (prefixAndLocalName != null) { + String prefix = prefixAndLocalName.getLeft(); + String ln = prefixAndLocalName.getRight(); + // Safe for RDF/XML as well + if (strSafeFor(ln, ':')) { + result = prefix + ":" + ln; + } + } + return result; } @Override - public String abbreviate(String uriStr) { - Objects.requireNonNull(uriStr); - Pair p = abbrev(uriStr); - if (p == null) - return null; - return p.getLeft() + ":" + p.getRight(); + public String get(String prefix) { + Objects.requireNonNull(prefix); + return calcWithLock(rwl.readLock(), () -> { + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + return prefixToIri.get(canonicalPrefix); + }); } - // This is thread safe (does not crash) - it is not thread-consistent (answer - // uncertain if the prefix mappings are in flux). + /** Returns an unmodifiable and non-synchronized(!) view of the mappings */ @Override - public Pair abbrev(String uriStr) { - Objects.requireNonNull(uriStr); - // Fast path. - // Look for a prefix by URI ending "#" or "/" - // then look for that as a known prefix. - String candidate = getPossibleKey(uriStr); - if ( candidate != null ) { - String uriForPrefix = uriToPrefix.get(candidate); - if ( uriForPrefix != null ) { - // Fast track. - String ln = uriStr.substring(candidate.length()); - if ( isSafeLocalPart(ln) ) - return Pair.create(uriForPrefix, ln); - } - } - // Not in the uri -> prefix map. Crunch it. - return PrefixLib.abbrev(prefixes, uriStr, true); + public Map getMapping() { + return prefixToIriView; } - /** - * Takes a guess for the namespace URI string to use in abbreviation. - * Finds the part of the IRI string before the last '#' or '/'. - * - * @param iriString String string - * @return String or null - */ - protected static String getPossibleKey(String iriString) { - int index = iriString.lastIndexOf('#'); - if (index > -1) - return iriString.substring(0, index + 1); - index = iriString.lastIndexOf('/'); - if (index > -1) - return iriString.substring(0, index + 1); - // We could add ':' here, it is used as a separator in URNs. - // But it is a multiple use character and always present in the scheme name. - // This is a fast-track guess so don't try guessing based on ':'. - return null; + @Override + public void clear() { + runWithLock(rwl.writeLock(), () -> { + if (!prefixToIri.isEmpty()) { + prefixToIri.clear(); + iriToPrefixTrie.clear(); + iriToPrefixMap.clear(); + cache.invalidateAll(); + ++generation; + } + }); } @Override public boolean isEmpty() { - return prefixes.isEmpty(); + return calcWithLock(rwl.readLock(), () -> prefixToIri.isEmpty()); } @Override public int size() { - return prefixes.size(); + return calcWithLock(rwl.readLock(), () -> prefixToIri.size()); + } + + @Override + public boolean containsPrefix(String prefix) { + Objects.requireNonNull(prefix); + return calcWithLock(rwl.readLock(), () -> { + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + return prefixToIri.containsKey(canonicalPrefix); + }); + } + + private static String getPossibleKey(String iriString) { + int n = iriString.length(); + int i; + for (i = n - 1; i >= 0; --i) { + char c = iriString.charAt(i); + if (c == '#' || c == '/') { + // We could add ':' here, it is used as a separator in URNs. + // But it is a multiple use character and always present in the scheme name. + // This is a fast-track guess so don't try guessing based on ':'. + break; + } + } + String result = i >= 0 ? iriString.substring(0, i + 1) : null; + return result; + } + + private String performPrefixLookup(String iriStr) { + String prefix = null; + String iriForPrefix = getPossibleKey(iriStr); + // Try fast track first - if it produces a hit then + // no overhead writing to the cache is needed + // The drawback is that we do not necessarily get the longest prefix + if (iriForPrefix != null) { + prefix = iriToPrefixMap.get(iriForPrefix); + } + + // If no solution yet then search for longest prefix + if (prefix == null) { + prefix = cachedPrefixLookup(iriStr).orElse(null); + } + return prefix; + } + + private Optional cachedPrefixLookup(String iri) { + if (cacheVersion != generation) { + cache.invalidateAll(); + cacheVersion = generation; + } + + Optional prefix; + try { + prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri))); + } catch (ExecutionException e) { + throw new RuntimeException("Unexpected failure during cache lookup", e); + } + return prefix; + } + + private String uncachedPrefixLookup(String iriStr) { + String prefix = iriToPrefixTrie.longestMatch(iriStr); + return prefix; + } + + private static void runWithLock(Lock lock, Runnable runnable) { + lock.lock(); + try { + runnable.run(); + } finally { + lock.unlock(); + } + } + + private static T calcWithLock(Lock lock, Supplier supplier) { + T result; + lock.lock(); + try { + result = supplier.get(); + } finally { + lock.unlock(); + } + return result; } }