diff --git a/Changes b/Changes index 37d2ffc3..e0b9400b 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,6 @@ +0.62.6 2024-06-13 + - [feature] Make match expansion configurable (close #150, margaretha) + 0.62.5 2024-06-11 - [bugfix] cut primary data according to max values (margaretha, #143) - [enhancement] restrict match expansion by max token and context diff --git a/pom.xml b/pom.xml index dbfe7821..77435b0d 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,7 @@ de.ids-mannheim.korap.krill Krill - 0.62.5 + 0.62.6 jar Krill diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java index bf14ca2d..fa187403 100644 --- a/src/main/java/de/ids_mannheim/korap/response/Match.java +++ b/src/main/java/de/ids_mannheim/korap/response/Match.java @@ -1117,9 +1117,11 @@ public void expandContextToSpan (String element) { if (spanContext[0] >= 0 && spanContext[0] < spanContext[1]) { - - int maxExpansionSize = KrillProperties.maxTokenMatchSize - + KrillProperties.maxTokenContextSize; + + int maxExpansionSize = KrillProperties.maxTokenMatchSize; + if (KrillProperties.matchExpansionIncludeContextSize) { + maxExpansionSize += KrillProperties.maxTokenContextSize; + } // Match needs to be cutted! boolean cutExpansion = false; diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java index 1ebc4490..6c6d5b48 100644 --- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java +++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java @@ -23,6 +23,8 @@ public class KrillProperties { public static int maxTokenContextSize = 60; public static int maxCharContextSize = 500; + public static boolean matchExpansionIncludeContextSize = false; + public static String namedVCPath = ""; public static boolean isTest = false; @@ -100,6 +102,10 @@ public static void updateConfigurations (Properties prop) { isTest = Boolean.parseBoolean(p); namedVCPath = prop.getProperty("krill.namedVC", ""); + + String matchExpansion = prop.getProperty( + "krill.match." + "expansion.includeContextSize", "false"); + matchExpansionIncludeContextSize = Boolean.parseBoolean(matchExpansion); } diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java index 1bf26776..97aa4292 100644 --- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java +++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java @@ -30,6 +30,7 @@ import de.ids_mannheim.korap.response.Result; import de.ids_mannheim.korap.response.match.MatchIdentifier; import de.ids_mannheim.korap.response.match.PosIdentifier; +import de.ids_mannheim.korap.util.KrillProperties; import de.ids_mannheim.korap.util.QueryException; @RunWith(JUnit4.class) @@ -1219,6 +1220,8 @@ public void indexWithFieldInfo () throws IOException, QueryException { @Test public void indexCorolaTokensBugReplicated () throws IOException, QueryException { + KrillProperties.matchExpansionIncludeContextSize=false; + KrillIndex ki = new KrillIndex(); ki.addDoc(getClass().getResourceAsStream("/others/corola-bug.json"), false); @@ -1239,7 +1242,7 @@ public void indexCorolaTokensBugReplicated () throws IOException, QueryException String str = km.getSnippetBrackets(); assertTrue(str.contains("[{drukola/l:au:a}")); - assertFalse(str.contains("]")); + assertTrue(str.contains("]")); km = ki.getMatchInfo("match-Corola-blog/BlogPost/370281_a_371610-p50-51", "tokens", null, null,false, false, true); @@ -1247,6 +1250,8 @@ public void indexCorolaTokensBugReplicated () throws IOException, QueryException str = km.getSnippetBrackets(); assertTrue(str.contains("[{d")); assertTrue(str.contains("a}]")); + + KrillProperties.matchExpansionIncludeContextSize=true; }; diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties index 3714c0c3..fc26a131 100644 --- a/src/test/resources/krill.properties +++ b/src/test/resources/krill.properties @@ -6,5 +6,6 @@ krill.index.commit.count = 15 krill.namedVC = queries/collections/named-vcs/ krill.test = true +krill.match.expansion.includeContextSize = true krill.match.max.token=50 krill.context.max.token=25