Skip to content

Commit

Permalink
fix(search-instances): Fix wildcard search for diacritics (#691)
Browse files Browse the repository at this point in the history
* fix(search-instances): Fix wildcard search for diacritics

- Use keyword_icu instead of keyword_lowercase for fulltext fields plain values

Closes: MSEARCH-857

(cherry picked from commit 7473f32)
  • Loading branch information
viacheslavkol committed Nov 13, 2024
1 parent 44b0e3b commit 0c12918
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 1 deletion.
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
## v4.0.1 2024-11-13
### Bug fixes
* Remove shelving order calculation for local call-number types ([MSEARCH-878](https://issues.folio.org/browse/MSEARCH-878))
* Use keyword_icu instead of keyword_lowercase for fulltext fields plain values ([MSEARCH-857](https://folio-org.atlassian.net/browse/MSEARCH-857))

### Tech Dept
* Description ([ISSUE](https://folio-org.atlassian.net/browse/ISSUE))

### Dependencies
* Bump `LIB_NAME` from `OLD_VERSION` to `NEW_VERSION`
* Add `LIB_NAME VERSION`
* Remove `LIB_NAME`

---

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class PlainFieldDescription extends FieldDescription {

public static final String MULTILANG_FIELD_TYPE = "multilang";
public static final String STANDARD_FIELD_TYPE = "standard";
public static final String PLAIN_FULLTEXT_FIELD_TYPE = "keyword_lowercase";
public static final String PLAIN_FULLTEXT_FIELD_TYPE = "keyword_icu";

public static final Map<String, String> FULLTEXT_FIELD_TYPES = Map.of(
MULTILANG_FIELD_TYPE, PLAIN_FULLTEXT_FIELD_TYPE,
Expand Down
8 changes: 8 additions & 0 deletions src/main/resources/elasticsearch/index/authority.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
"icu_folding"
],
"type": "custom"
},
"keyword_icu": {
"filter": [
"lowercase",
"trim",
"icu_folding"
],
"type": "custom"
}
},
"analyzer": {
Expand Down
8 changes: 8 additions & 0 deletions src/main/resources/elasticsearch/index/linked-data-hub.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
"trim"
],
"type": "custom"
},
"keyword_icu": {
"filter": [
"lowercase",
"trim",
"icu_folding"
],
"type": "custom"
}
},
"analyzer": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
"trim"
],
"type": "custom"
},
"keyword_icu": {
"filter": [
"lowercase",
"trim",
"icu_folding"
],
"type": "custom"
}
},
"analyzer": {
Expand Down
8 changes: 8 additions & 0 deletions src/main/resources/elasticsearch/index/linked-data-work.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
"trim"
],
"type": "custom"
},
"keyword_icu": {
"filter": [
"lowercase",
"trim",
"icu_folding"
],
"type": "custom"
}
},
"analyzer": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ private static Stream<Arguments> testDataProvider() {
arguments("title all {value}", "deja vu"),
arguments("title all {value}", "déjà vu"),
arguments("title all {value}", "Algérie"),
arguments("title all {value}", "Der Preis der Verfuhrung"),
arguments("title all {value}", "Der Preis der Verführung"),
arguments("title all {value}", "Der Preis der Verfuhrung*"),
arguments("title all {value}", "Der Preis der Verführung*"),
// e here should replace e + U + 0301 (Combining Acute Accent)
arguments("title all {value}", "algerie"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
"alternativeTitleTypeId": null,
"alternativeTitle": "Pang'ok bangk'asyurangsŭ",
"authorityId": "9d968396-0cce-4e9f-8867-c4d04c01f535"
},
{
"alternativeTitle": "Der Preis der Verführung : die gesetzliche Schadensersatzklage wegen Ehebruchs in England zwischen 1857 und 1970 / Eike Götz Hosemann."
}
],
"identifiers": [
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/samples/semantic-web-primer/instance.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
{
"alternativeTitle": "Pang'ok bangk'asyurangsŭ",
"authorityId": "9d968396-0cce-4e9f-8867-c4d04c01f535"
},
{
"alternativeTitle": "Der Preis der Verführung : die gesetzliche Schadensersatzklage wegen Ehebruchs in England zwischen 1857 und 1970 / Eike Götz Hosemann."
}
],
"editions": [],
Expand Down

0 comments on commit 0c12918

Please sign in to comment.