Skip to content

Commit

Permalink
Merge pull request #210 from CDRH/elasticsearch_upgrade
Browse files Browse the repository at this point in the history
Elasticsearch upgrade
  • Loading branch information
techgique authored Mar 23, 2023
2 parents 2df4dde + f982173 commit 8a99138
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 58 deletions.
65 changes: 33 additions & 32 deletions lib/config/es_api_schemas/2.0.yml
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
# compatible with Apium v2.0
settings:
analysis:
char_filter:
escapes:
type: mapping
mappings:
- "<em> => "
- "</em> => "
- "<u> => "
- "</u> => "
- "<strong> => "
- "</strong> => "
- "- => "
- "& => "
- ": => "
- "; => "
- ", => "
- ". => "
- "$ => "
- "@ => "
- "~ => "
- "\" => "
- "' => "
- "[ => "
- "] => "
normalizer:
keyword_normalized:
type: custom
char_filter:
- escapes
filter:
- asciifolding
- lowercase
settings:
analysis:
char_filter:
escapes:
type: mapping
mappings:
- "<em> => "
- "</em> => "
- "<u> => "
- "</u> => "
- "<strong> => "
- "</strong> => "
- "- => "
- "& => "
- ": => "
- "; => "
- ", => "
- ". => "
- "$ => "
- "@ => "
- "~ => "
- "\" => "
- "' => "
- "[ => "
- "] => "
normalizer:
keyword_normalized:
type: custom
char_filter:
- escapes
filter:
- asciifolding
- lowercase
mappings:
properties:
identifier:
Expand Down
6 changes: 3 additions & 3 deletions lib/datura/elasticsearch/alias.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def self.add
{ add: { alias: ali, index: idx } }
]
}
RestClient.post(base_url, data.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(base_url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
puts "Successfully added alias #{ali}. Current alias list:"
Expand All @@ -40,15 +40,15 @@ def self.delete

url = File.join(options["es_path"], idx, "_alias", ali)

res = JSON.parse(RestClient.delete(url))
res = JSON.parse(RestClient.delete(url, @auth_header))
puts JSON.pretty_generate(res)
list
end

def self.list
options = Datura::Options.new({}).all

res = RestClient.get(File.join(options["es_path"], "_aliases"))
res = RestClient.get(File.join(options["es_path"], "_aliases"), )
JSON.pretty_generate(JSON.parse(res))
end

Expand Down
4 changes: 2 additions & 2 deletions lib/datura/elasticsearch/data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def self.clear_all(options)
if confirm == "Yes I'm sure"
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
json = { "query" => { "match_all" => {} } }
RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, json.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -66,7 +66,7 @@ def self.clear_index(options)

if confirmation
data = self.build_clear_data(options)
RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand Down
45 changes: 25 additions & 20 deletions lib/datura/elasticsearch/index.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require "json"
require "rest-client"
require "yaml"
require "base64"

require_relative "./../elasticsearch.rb"

Expand All @@ -21,10 +22,11 @@ def initialize(options = nil, schema_mapping: false)

@index_url = File.join(@options["es_path"], @options["es_index"])
@pretty_url = "#{@index_url}?pretty=true"
@mapping_url = File.join(@index_url, "_mapping", "_doc?pretty=true")
@mapping_url = File.join(@index_url, "_mapping?pretty=true")

# yaml settings (if exist) and mappings
@requested_schema = YAML.load_file(@options["es_schema"])
@auth_header = Datura::Helpers.construct_auth_header(@options)
# if requested, grab the mapping currently associated with this index
# otherwise wait until after the requested schema is loaded
get_schema_mapping if schema_mapping
Expand All @@ -33,17 +35,16 @@ def initialize(options = nil, schema_mapping: false)
def create
json = @requested_schema["settings"].to_json
puts "Creating ES index for API version #{@options["api_version"]}: #{@pretty_url}"

if json && json != "null"
RestClient.put(@pretty_url, json, { content_type: :json }) { |res, req, result|
RestClient.put(@pretty_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
raise "#{result.code} error creating Elasticsearch index: #{res}"
end
}
else
RestClient.put(@pretty_url, nil) { |res, req, result|
RestClient.put(@pretty_url, nil, @auth_header) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -56,15 +57,15 @@ def create
def delete
puts "Deleting #{@options["es_index"]} via url #{@pretty_url}"

RestClient.delete(@pretty_url) { |res, req, result|
RestClient.delete(@pretty_url, @auth_header) { |res, req, result|
if result.code != "200"
raise "#{result.code} error deleting Elasticsearch index: #{res}"
end
}
end

def get_schema
RestClient.get(@mapping_url) { |res, req, result|
RestClient.get(@mapping_url, @auth_header) { |res, req, result|
if result.code == "200"
JSON.parse(res)
else
Expand All @@ -77,13 +78,13 @@ def get_schema_mapping
# if mapping has not already been set, get the schema and manipulate
if !defined?(@schema_mapping)
@schema_mapping = {
"dyanmic" => nil, # /regex|regex/
"dynamic" => nil, # /regex|regex/
"fields" => [], # [ fields ]
"nested" => {} # { field: [ nested_fields ] }
}

schema = get_schema[@options["es_index"]]
doc = schema["mappings"]["_doc"]
doc = schema["mappings"]
doc["properties"].each do |field, value|
@schema_mapping["fields"] << field
if value["type"] == "nested"
Expand All @@ -92,12 +93,14 @@ def get_schema_mapping
end

regex_pieces = []
doc["dynamic_templates"].each do |template|
mapping = template.map { |k,v| v["match"] }.first
# dynamic fields are listed like *_k and will need
# to be converted to ^.*_k$, then combined into a mega-regex
es_match = mapping.sub("*", ".*")
regex_pieces << es_match
if doc["dynamic_templates"]
doc["dynamic_templates"].each do |template|
mapping = template.map { |k,v| v["match"] }.first
# dynamic fields are listed like *_k and will need
# to be converted to ^.*_k$, then combined into a mega-regex
es_match = mapping.sub("*", ".*")
regex_pieces << es_match
end
end
if !regex_pieces.empty?
regex_joined = regex_pieces.join("|")
Expand All @@ -111,7 +114,7 @@ def set_schema
json = @requested_schema["mappings"].to_json

puts "Setting schema: #{@mapping_url}"
RestClient.put(@mapping_url, json, { content_type: :json }) { |res, req, result|
RestClient.put(@mapping_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand Down Expand Up @@ -206,9 +209,10 @@ def self.clear_all(options)
puts "Type: 'Yes I'm sure'"
confirm = STDIN.gets.chomp
if confirm == "Yes I'm sure"
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true")
auth_header = Datura::Helpers.construct_auth_header(options)
json = { "query" => { "match_all" => {} } }
RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, json.to_json, auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -222,13 +226,14 @@ def self.clear_all(options)
end

def self.clear_index(options)
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true")
confirmation = self.confirm_clear(options, url)

if confirmation
data = self.build_clear_data(options)
RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result|
if result.code == "200"
auth_header = Datura::Helpers.construct_auth_header(options)
RestClient.post(url, data.to_json, auth_header.merge({content_type: :json })) { |res, req, result|
if result.code == "200" || result.code == "201"
puts res
else
raise "#{result.code} error when clearing index: #{res}"
Expand Down
3 changes: 2 additions & 1 deletion lib/datura/file_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def initialize(location, options)
@out_html = File.join(output, "html")
@out_iiif = File.join(output, "iiif")
@out_solr = File.join(output, "solr")
@auth_header = Datura::Helpers.construct_auth_header(options)
Datura::Helpers.make_dirs(@out_es, @out_html, @out_iiif, @out_solr)
# script locations set in child classes
end
Expand Down Expand Up @@ -68,7 +69,7 @@ def post_es(es)
# NOTE: If you need to do partial updates rather than replacement of doc
# you will need to add _update at the end of this URL
begin
RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, {:content_type => :json } )
RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, @auth_header.merge({:content_type => :json }) )
rescue => e
error = "Error transforming or posting to ES for #{self.filename(false)}: #{e}"
end
Expand Down
6 changes: 6 additions & 0 deletions lib/datura/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,10 @@ def self.should_update?(file, since_date=nil)
end
end

def self.construct_auth_header(options)
username = options["es_user"]
password = options["es_password"]
{ "Authorization" => "Basic #{Base64::encode64("#{username}:#{password}")}" }
end

end

0 comments on commit 8a99138

Please sign in to comment.