diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java b/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java index eaff163db12..b653d2e176b 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStd.java @@ -25,11 +25,13 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Supplier; import org.apache.jena.atlas.lib.Pair; import org.apache.jena.atlas.lib.Trie; -import org.apache.jena.ext.com.google.common.base.Stopwatch; import org.apache.jena.ext.com.google.common.cache.Cache; import org.apache.jena.ext.com.google.common.cache.CacheBuilder; import org.apache.jena.sparql.graph.PrefixMappingBase; @@ -54,6 +56,8 @@ public class PrefixMapStd extends PrefixMapBase { public static final int DFT_CACHE_SIZE = 1000; + private ReadWriteLock rwl = new ReentrantReadWriteLock(); + private Map prefixToIri = new LinkedHashMap<>(); private Map prefixToIriView = Collections.unmodifiableMap(prefixToIri); @@ -89,73 +93,24 @@ public PrefixMapStd(long longestMatchCacheSize) { this.cache = CacheBuilder.newBuilder().maximumSize(longestMatchCacheSize).build(); } - protected static String getPossibleKey(String iriString) { - int n = iriString.length(); - int i; - for (i = n - 1; i >= 0; --i) { - char c = iriString.charAt(i); - if (c == '#' || c == '/') { - // We could add ':' here, it is used as a separator in URNs. - // But it is a multiple use character and always present in the scheme name. - // This is a fast-track guess so don't try guessing based on ':'. - break; - } - } - String result = i >= 0 ? iriString.substring(0, i + 1) : null; - return result; - } - - protected String performPrefixLookup(String iriStr) { - String prefix = null; - String iriForPrefix = getPossibleKey(iriStr); - // Try fast track first - if it produces a hit then - // no overhead writing to the cache is needed - // The drawback is that we do not necessarily get the longest prefix - if (iriForPrefix != null) { - prefix = iriToPrefixMap.get(iriForPrefix); - } - - // If no solution yet then search for longest prefix - if (prefix == null) { - prefix = cachedPrefixLookup(iriStr).orElse(null); - } - return prefix; - } - - protected Optional cachedPrefixLookup(String iri) { - if (cacheVersion != generation) { - cache.invalidateAll(); - cacheVersion = generation; - } - - Optional prefix; - try { - prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri))); - } catch (ExecutionException e) { - throw new RuntimeException("Unexpected failure during cache lookup", e); - } - return prefix; - } - - protected String uncachedPrefixLookup(String iriStr) { - String prefix = iriToPrefixTrie.longestMatch(iriStr); - return prefix; - } - @Override public void add(String prefix, String iri) { Objects.requireNonNull(prefix); Objects.requireNonNull(iri); String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); - String oldIri = prefixToIri.get(canonicalPrefix); - if (oldIri != null) { - iriToPrefixTrie.remove(oldIri); - iriToPrefixMap.remove(oldIri); - } - prefixToIri.put(canonicalPrefix, iri); - iriToPrefixTrie.add(iri, canonicalPrefix); - iriToPrefixMap.put(iri, canonicalPrefix); - ++generation; + runWithLock(rwl.writeLock(), () -> { + String oldIri = prefixToIri.get(canonicalPrefix); + if (!Objects.equals(oldIri, iri)) { + if (oldIri != null) { + iriToPrefixTrie.remove(oldIri); + iriToPrefixMap.remove(oldIri); + } + prefixToIri.put(canonicalPrefix, iri); + iriToPrefixTrie.add(iri, canonicalPrefix); + iriToPrefixMap.put(iri, canonicalPrefix); + ++generation; + } + }); } /** See notes on reverse mappings in {@link PrefixMappingBase}. @@ -167,40 +122,45 @@ public void add(String prefix, String iri) { public void delete(String prefix) { Objects.requireNonNull(prefix); String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); - String iriForPrefix = prefixToIri.get(canonicalPrefix); - if (iriForPrefix != null) { - prefixToIri.remove(canonicalPrefix); - String prefixForIri = iriToPrefixMap.get(iriForPrefix); - if (canonicalPrefix.equals(prefixForIri)) { - iriToPrefixTrie.remove(iriForPrefix); - iriToPrefixMap.remove(prefixForIri); + runWithLock(rwl.writeLock(), () -> { + // Removal returns the previous value or null if there was none + String iriForPrefix = prefixToIri.remove(canonicalPrefix); + if (iriForPrefix != null) { + String prefixForIri = iriToPrefixMap.get(iriForPrefix); + if (canonicalPrefix.equals(prefixForIri)) { + iriToPrefixTrie.remove(iriForPrefix); + iriToPrefixMap.remove(prefixForIri); + } + ++generation; } - } - ++generation; + }); } @Override public Pair abbrev(String iriStr) { Objects.requireNonNull(iriStr); - Pair result = null; + return calcWithLock(rwl.readLock(), () -> { + Pair r = null; - String prefix = performPrefixLookup(iriStr); - String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null; + String prefix = performPrefixLookup(iriStr); + String iriForPrefix = prefix != null ? prefixToIri.get(prefix) : null; - // Post process a found solution - if (prefix != null && iriForPrefix != null) { - String localName = iriStr.substring(iriForPrefix.length()); - if (PrefixLib.isSafeLocalPart(localName)) { - result = Pair.create(prefix, localName); + // Post process a found solution + if (prefix != null && iriForPrefix != null) { + String localName = iriStr.substring(iriForPrefix.length()); + if (PrefixLib.isSafeLocalPart(localName)) { + r = Pair.create(prefix, localName); + } } - } - return result; + return r; + }); } @Override public String abbreviate(String iriStr) { Objects.requireNonNull(iriStr); String result = null; + // Locking is only needed in abbrev Pair prefixAndLocalName = abbrev(iriStr); if (prefixAndLocalName != null) { String prefix = prefixAndLocalName.getLeft(); @@ -216,10 +176,13 @@ public String abbreviate(String iriStr) { @Override public String get(String prefix) { Objects.requireNonNull(prefix); - String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); - return prefixToIri.get(canonicalPrefix); + return calcWithLock(rwl.readLock(), () -> { + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + return prefixToIri.get(canonicalPrefix); + }); } + /** Returns an unmodifiable and non-synchronized(!) view of the mappings */ @Override public Map getMapping() { return prefixToIriView; @@ -227,70 +190,106 @@ public Map getMapping() { @Override public void clear() { - cache.invalidateAll(); - prefixToIri.clear(); - iriToPrefixTrie.clear(); - iriToPrefixMap.clear(); - ++generation; + runWithLock(rwl.writeLock(), () -> { + if (!prefixToIri.isEmpty()) { + prefixToIri.clear(); + iriToPrefixTrie.clear(); + iriToPrefixMap.clear(); + cache.invalidateAll(); + ++generation; + } + }); } @Override public boolean isEmpty() { - return prefixToIri.isEmpty(); + return calcWithLock(rwl.readLock(), () -> prefixToIri.isEmpty()); } @Override public int size() { - return prefixToIri.size(); + return calcWithLock(rwl.readLock(), () -> prefixToIri.size()); } @Override public boolean containsPrefix(String prefix) { Objects.requireNonNull(prefix); - String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); - return prefixToIri.containsKey(canonicalPrefix); + return calcWithLock(rwl.readLock(), () -> { + String canonicalPrefix = PrefixLib.canonicalPrefix(prefix); + return prefixToIri.containsKey(canonicalPrefix); + }); } - public static void main(String[] args) throws Exception { - - String[][] baseIris = new String[2][]; - baseIris[0] = new String[]{"http://example.org/", "/"}; - baseIris[1] = new String[]{ "urn:foo:bar:", ":"}; - - for (int runId = 0; runId < 5; ++ runId) { - for (int baseId = 0; baseId < baseIris.length; ++baseId) { - String[] e = baseIris[baseId]; - String baseIriStr = e[0]; - String separator = e[1]; - - for(int approachId = 0; approachId < 2; ++approachId) { - // Select the prefix map implementation: 0 -> improved, 1 -> original - PrefixMap pm = approachId == 0 ? new PrefixMapStdOrig() : new PrefixMapStd(); - - // Initialize some prefixes - for (int i = 0; i < 2000; ++i) { - pm.add("ns" + i, baseIriStr + i + separator); - } - - // Lookup with the same IRI (always cache hit) - Stopwatch sw = Stopwatch.createStarted(); - String staticIri = baseIriStr + "1" + separator + "foobar"; - for (int i = 0; i < 1000000; ++i) { - String abbr = pm.abbreviate(staticIri); - } - System.out.println(String.format("Run %d with base <%s> and separator %s using approach %d: Static IRI lookups took %.3f seconds", runId, baseIriStr, separator, approachId, sw.elapsed(TimeUnit.MILLISECONDS) * 0.001)); - - // Lookup with different IRIs - Stopwatch sw2 = Stopwatch.createStarted(); - for (int i = 0; i < 1000000; ++i) { - String iriStr = baseIriStr + (i % 10000) + separator + "foobar"; - String abbr = pm.abbreviate(iriStr); - } - System.out.println(String.format("Run %d with base <%s> and separator %s using approach %d: Dynamic IRI lookups took %.3f seconds", runId, baseIriStr, separator, approachId, sw2.elapsed(TimeUnit.MILLISECONDS) * 0.001)); - - System.out.println(); - } + private static String getPossibleKey(String iriString) { + int n = iriString.length(); + int i; + for (i = n - 1; i >= 0; --i) { + char c = iriString.charAt(i); + if (c == '#' || c == '/') { + // We could add ':' here, it is used as a separator in URNs. + // But it is a multiple use character and always present in the scheme name. + // This is a fast-track guess so don't try guessing based on ':'. + break; } } + String result = i >= 0 ? iriString.substring(0, i + 1) : null; + return result; + } + + private String performPrefixLookup(String iriStr) { + String prefix = null; + String iriForPrefix = getPossibleKey(iriStr); + // Try fast track first - if it produces a hit then + // no overhead writing to the cache is needed + // The drawback is that we do not necessarily get the longest prefix + if (iriForPrefix != null) { + prefix = iriToPrefixMap.get(iriForPrefix); + } + + // If no solution yet then search for longest prefix + if (prefix == null) { + prefix = cachedPrefixLookup(iriStr).orElse(null); + } + return prefix; + } + + private Optional cachedPrefixLookup(String iri) { + if (cacheVersion != generation) { + cache.invalidateAll(); + cacheVersion = generation; + } + + Optional prefix; + try { + prefix = cache.get(iri, () -> Optional.ofNullable(uncachedPrefixLookup(iri))); + } catch (ExecutionException e) { + throw new RuntimeException("Unexpected failure during cache lookup", e); + } + return prefix; + } + + private String uncachedPrefixLookup(String iriStr) { + String prefix = iriToPrefixTrie.longestMatch(iriStr); + return prefix; + } + + private static void runWithLock(Lock lock, Runnable runnable) { + lock.lock(); + try { + runnable.run(); + } finally { + lock.unlock(); + } + } + + private static T calcWithLock(Lock lock, Supplier supplier) { + T result; + lock.lock(); + try { + result = supplier.get(); + } finally { + lock.unlock(); + } + return result; } } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStdOrig.java b/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStdOrig.java deleted file mode 100644 index 3f923f21ce9..00000000000 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/PrefixMapStdOrig.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.riot.system; - -import static org.apache.jena.riot.system.PrefixLib.canonicalPrefix; -import static org.apache.jena.riot.system.PrefixLib.isSafeLocalPart; - -import java.util.Collections ; -import java.util.Map ; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.jena.atlas.lib.Pair ; - -/** - * In-memory implementation of a {@link PrefixMap}. - *

- * This also provides fast URI to prefix name calculation suitable for output. For - * output, calculating possible prefix names from a URI happens on every URI so this - * operations needs to be efficient. Normally, a prefix map is "prefix to URI" and - * the abbreviation is a reverse lookup, which is a scan of the value of the map. - * This class keeps a reverse lookup map of URI to prefix which combined with a fast, - * approximate for determining the split point exploiting the most common use cases, - * provides efficient abbreviation. - *

- * Usage for abbreviation: call - * {@linkplain PrefixMapFactory#createForOutput(PrefixMap)} which copies the argument - * prefix map into an instance of this class, setting up the reverse lookup. This - * copy is cheaper than repeated reverse lookups would be. - */ -public class PrefixMapStdOrig extends PrefixMapBase { - // Expansion map - final Map prefixes = new ConcurrentHashMap<>(); - - // Immutable view of prefixes - private final Map prefixes2 = Collections.unmodifiableMap(prefixes); - - // Abbreviation map used for common cases. - // This keeps the URI->prefix mappings for a computed guess at the answer, before - // resorting to a full search. See abbrev(String) below. - private final Map uriToPrefix = new ConcurrentHashMap<>(); - - /** - * Creates a new empty prefix mapping - */ - public PrefixMapStdOrig() {} - - /** - * Creates a new prefix mapping copied from an existing map - * @param prefixMap Prefix Map - */ - public PrefixMapStdOrig(PrefixMap prefixMap) { - Objects.requireNonNull(prefixMap); - prefixes.putAll(prefixMap.getMapping()); - } - - @Override - public Map getMapping() { - return prefixes2; - } - - @Override - public String get(String prefix) { - Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - return prefixes.get(prefix); - } - - @Override - public void add(String prefix, String iri) { - Objects.requireNonNull(prefix); - Objects.requireNonNull(iri); - prefix = canonicalPrefix(prefix); - String oldURI = prefixes.get(prefix); - if ( oldURI != null ) - uriToPrefix.remove(oldURI); - prefixes.put(prefix, iri); - uriToPrefix.put(iri.toString(), prefix) ; - } - - @Override - public void delete(String prefix) { - Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - prefixes.remove(prefix); - // Remove from the abbreviation map. - uriToPrefix.values().remove(prefix); - } - - @Override - public void clear() { - prefixes.clear() ; - } - - @Override - public boolean containsPrefix(String prefix) { - Objects.requireNonNull(prefix); - prefix = canonicalPrefix(prefix); - return prefixes.containsKey(prefix); - } - - @Override - public String abbreviate(String uriStr) { - Objects.requireNonNull(uriStr); - Pair p = abbrev(uriStr); - if (p == null) - return null; - return p.getLeft() + ":" + p.getRight(); - } - - // This is thread safe (does not crash) - it is not thread-consistent (answer - // uncertain if the prefix mappings are in flux). - @Override - public Pair abbrev(String uriStr) { - Objects.requireNonNull(uriStr); - // Fast path. - // Look for a prefix by URI ending "#" or "/" - // then look for that as a known prefix. - String candidate = getPossibleKey(uriStr); - if ( candidate != null ) { - String uriForPrefix = uriToPrefix.get(candidate); - if ( uriForPrefix != null ) { - // Fast track. - String ln = uriStr.substring(candidate.length()); - if ( isSafeLocalPart(ln) ) - return Pair.create(uriForPrefix, ln); - } - } - // Not in the uri -> prefix map. Crunch it. - return PrefixLib.abbrev(prefixes, uriStr, true); - } - - /** - * Takes a guess for the namespace URI string to use in abbreviation. - * Finds the part of the IRI string before the last '#' or '/'. - * - * @param iriString String string - * @return String or null - */ - protected static String getPossibleKey(String iriString) { - int index = iriString.lastIndexOf('#'); - if (index > -1) - return iriString.substring(0, index + 1); - index = iriString.lastIndexOf('/'); - if (index > -1) - return iriString.substring(0, index + 1); - // We could add ':' here, it is used as a separator in URNs. - // But it is a multiple use character and always present in the scheme name. - // This is a fast-track guess so don't try guessing based on ':'. - return null; - } - - @Override - public boolean isEmpty() { - return prefixes.isEmpty(); - } - - @Override - public int size() { - return prefixes.size(); - } -}