Skip to content

Commit

Permalink
MSEARCH-466 Add char_filter for indexes (#318)
Browse files Browse the repository at this point in the history
* Add char_filter for instance/authority

(cherry picked from commit be43e28)
  • Loading branch information
Shans-Kaluhin committed Dec 6, 2022
1 parent 0da948c commit d18b3b2
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,19 @@
@Component
public class AllTermQueryBuilder extends FulltextQueryBuilder {

private static final String WHITE_SPACE = "\\s+";

@Override
public QueryBuilder getQuery(Object term, String resource, String... fields) {
if (term instanceof String) {
var stringTerm = (String) term;
var terms = stringTerm.split("\\s+");
var terms = stringTerm.split(WHITE_SPACE);

if (terms.length == 1) {
return getMultiMatchQuery(terms[0], fields);
} else {
return getBoolQuery(terms, fields);
}

var boolQuery = boolQuery();
for (var singleTerm : terms) {
boolQuery.must(getMultiMatchQuery(singleTerm, fields));
}
return boolQuery;
}

return getMultiMatchQuery(term, fields);
Expand All @@ -51,4 +50,12 @@ public Set<String> getSupportedComparators() {
private QueryBuilder getMultiMatchQuery(Object term, String... fieldNames) {
return multiMatchQuery(term, fieldNames).operator(AND).type(CROSS_FIELDS);
}

private QueryBuilder getBoolQuery(String[] terms, String... fieldNames) {
var boolQuery = boolQuery();
for (var singleTerm : terms) {
boolQuery.must(getMultiMatchQuery(singleTerm, fieldNames));
}
return boolQuery;
}
}
12 changes: 11 additions & 1 deletion src/main/resources/elasticsearch/index/authority.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,19 @@
"source_analyzer": {
"tokenizer": "icu_tokenizer",
"filter": [ "folio_word_delimiter_graph", "icu_folding" ],
"char_filter": [ "authority_char_filter" ],
"type": "custom"
}
},
"tokenizers": { }
"tokenizers": { },
"char_filter": {
"authority_char_filter": {
"type": "mapping",
"mappings": [
"& => and",
": => \\s+"
]
}
}
}
}
12 changes: 11 additions & 1 deletion src/main/resources/elasticsearch/index/instance.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,19 @@
"source_analyzer": {
"tokenizer": "icu_tokenizer",
"filter": [ "folio_word_delimiter_graph", "icu_folding" ],
"char_filter": [ "instance_char_filter" ],
"type": "custom"
}
},
"tokenizers": { }
"tokenizers": { },
"char_filter": {
"instance_char_filter": {
"type": "mapping",
"mappings": [
"& => and",
": => \\s+"
]
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,9 @@ private static Stream<Arguments> testDataProvider() {
arguments("holdingsIdentifiers all {value}", "e3ff6133-b9a2-4d4c-a1c9-dc1867d4df19"),

//search by multiple different parameters
arguments("(keyword all {value})", "wolves matthew 9781609383657")
arguments("(keyword all {value})", "wolves matthew 9781609383657"),
arguments("(keyword all {value})", "A semantic web primer & wolves"),
arguments("(title all {value})", "A semantic web primer : 0747-0850")
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"id": "5bf370e0-8cca-4d9c-82e4-5170ab2a0a39",
"hrid": "inst000000000022",
"source": "FOLIO",
"title": "A semantic web primer 0747-0850 wolves",
"title": "A semantic web primer :0747-0850 & wolves",
"indexTitle": "Semantic web primer",
"alternativeTitles": [
{
Expand Down

0 comments on commit d18b3b2

Please sign in to comment.