From 7a5c00e385a45487cebbe53d14aece43b1f5ca52 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sat, 26 Oct 2024 11:53:11 -0700 Subject: [PATCH] Add custom synonym_analyzer Signed-off-by: Prudhvi Godithi --- .../gradle/testclusters/OpenSearchNode.java | 12 +++++++---- .../common/CommonAnalysisModulePlugin.java | 19 ++++++++++++++++-- .../SynonymGraphTokenFilterFactory.java | 5 +++-- .../common/SynonymTokenFilterFactory.java | 20 +++++++++++++++++-- .../indices/analysis/AnalysisModule.java | 7 ++++++- .../opensearch/plugins/AnalysisPlugin.java | 9 +++++++++ 6 files changed, 61 insertions(+), 11 deletions(-) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java index cd22560af9a96..bb409c2afd871 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java @@ -1216,14 +1216,18 @@ private void createConfiguration() { ); final List configFiles; - try (Stream stream = Files.list(getDistroDir().resolve("config"))) { + try (Stream stream = Files.walk(getDistroDir().resolve("config"))) { configFiles = stream.collect(Collectors.toList()); } logToProcessStdout("Copying additional config files from distro " + configFiles); for (Path file : configFiles) { - Path dest = configFile.getParent().resolve(file.getFileName()); - if (Files.exists(dest) == false) { - Files.copy(file, dest); + Path relativePath = getDistroDir().resolve("config").relativize(file); + Path dest = configFile.getParent().resolve(relativePath); + if (Files.isDirectory(file)) { + Files.createDirectories(dest); + } else { + Files.createDirectories(dest.getParent()); + Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING); } } } catch (IOException e) { diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index f14e499081ce9..028932c645928 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -146,6 +146,7 @@ import org.opensearch.index.analysis.PreConfiguredTokenizer; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.indices.analysis.AnalysisModule; import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.opensearch.plugins.AnalysisPlugin; @@ -157,9 +158,11 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -176,6 +179,7 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin private final SetOnce scriptService = new SetOnce<>(); + @Override public Collection createComponents( Client client, @@ -194,6 +198,7 @@ public Collection createComponents( return Collections.emptyList(); } + @Override public List> getContexts() { return Collections.singletonList(AnalysisPredicateScript.CONTEXT); @@ -332,8 +337,6 @@ public Map> getTokenFilters() { filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new); filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); filters.put("stemmer", StemmerTokenFilterFactory::new); - filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new)); - filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new)); filters.put("trim", TrimTokenFilterFactory::new); filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new)); filters.put("unique", UniqueTokenFilterFactory::new); @@ -343,6 +346,18 @@ public Map> getTokenFilters() { return filters; } + @Override + public Map> getTokenFilters(AnalysisModule analysisModule) { + Map> filters = getTokenFilters(); + filters.put("synonym", requiresAnalysisSettings((indexSettings, environment, name, settings) -> + new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry()) + )); + filters.put("synonym_graph", requiresAnalysisSettings((indexSettings, environment, name, settings) -> + new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry()) + )); + return filters; + } + @Override public Map> getCharFilters() { Map> filters = new TreeMap<>(); diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java index fed959108c411..d1f1ebfd906f6 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java @@ -40,6 +40,7 @@ import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalysisMode; +import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.analysis.CharFilterFactory; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; @@ -49,8 +50,8 @@ public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory { - SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, env, name, settings); + SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) { + super(indexSettings, env, name, settings, analysisRegistry); } @Override diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java index 01a65e87d7466..c1ded45f876b7 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java @@ -48,7 +48,9 @@ import org.opensearch.index.analysis.CustomAnalyzer; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.index.analysis.AnalysisRegistry; +import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.List; @@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { protected final Settings settings; protected final Environment environment; protected final AnalysisMode analysisMode; + private final String synonymAnalyzer; + private final AnalysisRegistry analysisRegistry; - SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) { super(indexSettings, name, settings); this.settings = settings; @@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { boolean updateable = settings.getAsBoolean("updateable", false); this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL; this.environment = env; + this.synonymAnalyzer = settings.get("synonym_analyzer", null); + this.analysisRegistry = analysisRegistry; } @Override @@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer( List tokenFilters, Function allFilters ) { + if (synonymAnalyzer != null) { + Analyzer customSynonymAnalyzer; + try { + customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (customSynonymAnalyzer != null) { + return customSynonymAnalyzer; + } + } return new CustomAnalyzer( tokenizer, charFilters.toArray(new CharFilterFactory[0]), @@ -177,5 +194,4 @@ Reader getRulesFromSettings(Environment env) { } return rulesReader; } - } diff --git a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java index 0926d497087d1..dbb3035a18f74 100644 --- a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java +++ b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java @@ -165,7 +165,12 @@ public boolean requiresAnalysisSettings() { ) ); - tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters); + for (AnalysisPlugin plugin : plugins) { + Map> filters = plugin.getTokenFilters(this); + for (Map.Entry> entry : filters.entrySet()) { + tokenFilters.register(entry.getKey(), entry.getValue()); + } + } return tokenFilters; } diff --git a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java index 53dcc916b244f..a7c4604a30553 100644 --- a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java @@ -47,6 +47,7 @@ import org.opensearch.index.analysis.PreConfiguredTokenizer; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.indices.analysis.AnalysisModule; import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider; import java.io.IOException; @@ -84,6 +85,14 @@ default Map> getCharFilters() { return emptyMap(); } + /** + * Override to add additional {@link TokenFilter}s that need access to the AnalysisModule. + * The default implementation calls the existing getTokenFilters() method for backward compatibility. + */ + default Map> getTokenFilters(AnalysisModule analysisModule) { + return getTokenFilters(); + } + /** * Override to add additional {@link TokenFilter}s. See {@link #requiresAnalysisSettings(AnalysisProvider)} * how to on get the configuration from the index.