Skip to content

Commit

Permalink
Make match expansion configurable (#150)
Browse files Browse the repository at this point in the history
Change-Id: Ie4eb9098f6e7352918e1fd0d3bf74615d3508e7e
  • Loading branch information
margaretha committed Jun 14, 2024
1 parent 264e10f commit ba307f2
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 5 deletions.
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
0.62.6 2024-06-13
- [feature] Make match expansion configurable (close #150, margaretha)

0.62.5 2024-06-11
- [bugfix] cut primary data according to max values (margaretha, #143)
- [enhancement] restrict match expansion by max token and context
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

<groupId>de.ids-mannheim.korap.krill</groupId>
<artifactId>Krill</artifactId>
<version>0.62.5</version>
<version>0.62.6</version>
<packaging>jar</packaging>

<name>Krill</name>
Expand Down
8 changes: 5 additions & 3 deletions src/main/java/de/ids_mannheim/korap/response/Match.java
Original file line number Diff line number Diff line change
Expand Up @@ -1117,9 +1117,11 @@ public void expandContextToSpan (String element) {

if (spanContext[0] >= 0
&& spanContext[0] < spanContext[1]) {

int maxExpansionSize = KrillProperties.maxTokenMatchSize
+ KrillProperties.maxTokenContextSize;

int maxExpansionSize = KrillProperties.maxTokenMatchSize;
if (KrillProperties.matchExpansionIncludeContextSize) {
maxExpansionSize += KrillProperties.maxTokenContextSize;
}

// Match needs to be cutted!
boolean cutExpansion = false;
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public class KrillProperties {
public static int maxTokenContextSize = 60;
public static int maxCharContextSize = 500;

public static boolean matchExpansionIncludeContextSize = false;

public static String namedVCPath = "";
public static boolean isTest = false;

Expand Down Expand Up @@ -100,6 +102,10 @@ public static void updateConfigurations (Properties prop) {
isTest = Boolean.parseBoolean(p);

namedVCPath = prop.getProperty("krill.namedVC", "");

String matchExpansion = prop.getProperty(
"krill.match." + "expansion.includeContextSize", "false");
matchExpansionIncludeContextSize = Boolean.parseBoolean(matchExpansion);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.response.match.MatchIdentifier;
import de.ids_mannheim.korap.response.match.PosIdentifier;
import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;

@RunWith(JUnit4.class)
Expand Down Expand Up @@ -1219,6 +1220,8 @@ public void indexWithFieldInfo () throws IOException, QueryException {

@Test
public void indexCorolaTokensBugReplicated () throws IOException, QueryException {
KrillProperties.matchExpansionIncludeContextSize=false;

KrillIndex ki = new KrillIndex();

ki.addDoc(getClass().getResourceAsStream("/others/corola-bug.json"), false);
Expand All @@ -1239,14 +1242,16 @@ public void indexCorolaTokensBugReplicated () throws IOException, QueryException

String str = km.getSnippetBrackets();
assertTrue(str.contains("[<!>{drukola/l:au:a}"));
assertFalse(str.contains("<!>]"));
assertTrue(str.contains("<!>]"));

km = ki.getMatchInfo("match-Corola-blog/BlogPost/370281_a_371610-p50-51", "tokens", null, null,false, false, true);

// The match needs to be cutted on both sides!
str = km.getSnippetBrackets();
assertTrue(str.contains("[<!>{d"));
assertTrue(str.contains("a}<!>]"));

KrillProperties.matchExpansionIncludeContextSize=true;
};


Expand Down
1 change: 1 addition & 0 deletions src/test/resources/krill.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ krill.index.commit.count = 15
krill.namedVC = queries/collections/named-vcs/
krill.test = true

krill.match.expansion.includeContextSize = true
krill.match.max.token=50
krill.context.max.token=25

0 comments on commit ba307f2

Please sign in to comment.