From c761d5c198f42aafadf4e9478bbb03170a549af3 Mon Sep 17 00:00:00 2001 From: Jarrod Baker Date: Fri, 8 Jul 2022 09:25:25 +0100 Subject: [PATCH 1/2] Update evidence index with explicit type mappings Without explicit type mappings ES infers the type of new document fields. A small number of evidence objects have values which do not fit into a `float`. Since the vast bulk of entries do fit into a `float` ES infers that to be the type. This causes ~50 entries to be discarded due to a type mismatch. This bug is non-deterministic: there is always a chance that one of the 50 (out of ~650k) will be loaded in the first batch and the type inference will work correctly. Resolves opentargets/platform#1687 --- scripts/ES/index_settings_evidence.json | 32 +++++++++++++++++++ .../modules/posvm/scripts/load_all_data.sh | 2 +- .../modules/posvm/scripts/load_data.sh | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 scripts/ES/index_settings_evidence.json diff --git a/scripts/ES/index_settings_evidence.json b/scripts/ES/index_settings_evidence.json new file mode 100644 index 0000000..6980cea --- /dev/null +++ b/scripts/ES/index_settings_evidence.json @@ -0,0 +1,32 @@ +{ + "mappings": { + "dynamic_templates": [ + { + "nesteds": { + "match_mapping_type": "object", + "match": "facet_*", + "mapping": { + "type": "nested" + } + } + } + ], + "properties": { + "oddsRatio": { + "type": "double" + }, + "oddsRatioConfidenceIntervalLower": { + "type": "double" + }, + "oddsRatioConfidenceIntervalUpper": { + "type": "double" + } + } + }, + "settings": { + "index": { + "number_of_replicas": 0, + "number_of_shards": 5 + } + } +} \ No newline at end of file diff --git a/terraform_create_images/modules/posvm/scripts/load_all_data.sh b/terraform_create_images/modules/posvm/scripts/load_all_data.sh index ca6cde9..4fc85d6 100644 --- a/terraform_create_images/modules/posvm/scripts/load_all_data.sh +++ b/terraform_create_images/modules/posvm/scripts/load_all_data.sh @@ -44,7 +44,7 @@ do export ID='id' export INDEX_NAME="${token}" export INPUT="${full_folder}" - export INDEX_SETTINGS=$PREFIX_DATA/index_settings.json + export INDEX_SETTINGS=$PREFIX_DATA/index_settings_evidence.json /tmp/load_json_esbulk.sh done diff --git a/terraform_create_images/modules/posvm/scripts/load_data.sh b/terraform_create_images/modules/posvm/scripts/load_data.sh index 829bf47..7f66d54 100755 --- a/terraform_create_images/modules/posvm/scripts/load_data.sh +++ b/terraform_create_images/modules/posvm/scripts/load_data.sh @@ -54,6 +54,7 @@ sudo chmod 555 load_json_esbulk.sh sudo wget -O /tmp/data/index_settings.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings.json sudo wget -O /tmp/data/index_settings_search_known_drugs.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings_search_known_drugs.json sudo wget -O /tmp/data/index_settings_search.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings_search.json +sudo wget -O /tmp/data/index_settings_evidence.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/evidence_settings_search.json export ES=${ELASTICSEARCH_URI}:9200 export PREFIX_DATA=/tmp/data/ From 3e748e1bc4ce805aee6ef501d113f94883346817 Mon Sep 17 00:00:00 2001 From: Jarrod Baker Date: Fri, 8 Jul 2022 10:44:01 +0100 Subject: [PATCH 2/2] Update evidence index settings file name --- terraform_create_images/modules/posvm/scripts/load_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform_create_images/modules/posvm/scripts/load_data.sh b/terraform_create_images/modules/posvm/scripts/load_data.sh index 7f66d54..b4e6ed2 100755 --- a/terraform_create_images/modules/posvm/scripts/load_data.sh +++ b/terraform_create_images/modules/posvm/scripts/load_data.sh @@ -54,7 +54,7 @@ sudo chmod 555 load_json_esbulk.sh sudo wget -O /tmp/data/index_settings.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings.json sudo wget -O /tmp/data/index_settings_search_known_drugs.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings_search_known_drugs.json sudo wget -O /tmp/data/index_settings_search.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings_search.json -sudo wget -O /tmp/data/index_settings_evidence.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/evidence_settings_search.json +sudo wget -O /tmp/data/index_settings_evidence.json https://raw.githubusercontent.com/opentargets/platform-etl-backend/master/elasticsearch/index_settings_evidence.json export ES=${ELASTICSEARCH_URI}:9200 export PREFIX_DATA=/tmp/data/