From 0332408c9240af8864cc5ba19340a73d0db718fb Mon Sep 17 00:00:00 2001
From: Prudhvi Godithi <pgodithi@amazon.com>
Date: Sat, 26 Oct 2024 11:53:11 -0700
Subject: [PATCH 1/2] Add custom synonym_analyzer

Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
---
 .../gradle/testclusters/OpenSearchNode.java   | 12 ++-
 .../common/CommonAnalysisModulePlugin.java    | 33 ++++++-
 .../SynonymGraphTokenFilterFactory.java       | 11 ++-
 .../common/SynonymTokenFilterFactory.java     | 28 +++++-
 .../common/CommonAnalysisFactoryTests.java    | 42 ++++++++
 .../common/SynonymsAnalysisTests.java         | 97 +++++++++++++++++--
 .../indices/analysis/AnalysisModule.java      |  7 +-
 .../opensearch/plugins/AnalysisPlugin.java    |  9 ++
 .../indices/analysis/AnalysisModuleTests.java | 56 +++++++++++
 .../analysis/AnalysisFactoryTestCase.java     | 11 +++
 10 files changed, 288 insertions(+), 18 deletions(-)
diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
index cd22560af9a96..bb409c2afd871 100644
--- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
+++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
@@ -1216,14 +1216,18 @@ private void createConfiguration() {
             );
 
             final List<Path> configFiles;
-            try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
+            try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
                 configFiles = stream.collect(Collectors.toList());
             }
             logToProcessStdout("Copying additional config files from distro " + configFiles);
             for (Path file : configFiles) {
-                Path dest = configFile.getParent().resolve(file.getFileName());
-                if (Files.exists(dest) == false) {
-                    Files.copy(file, dest);
+                Path relativePath = getDistroDir().resolve("config").relativize(file);
+                Path dest = configFile.getParent().resolve(relativePath);
+                if (Files.isDirectory(file)) {
+                    Files.createDirectories(dest);
+                } else {
+                    Files.createDirectories(dest.getParent());
+                    Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
                 }
             }
         } catch (IOException e) {
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
index f14e499081ce9..763c1783c2d28 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
@@ -146,6 +146,7 @@
 import org.opensearch.index.analysis.PreConfiguredTokenizer;
 import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
 import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
 import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
 import org.opensearch.plugins.AnalysisPlugin;
@@ -332,8 +333,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
         filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
         filters.put("stemmer", StemmerTokenFilterFactory::new);
-        filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
-        filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
         filters.put("trim", TrimTokenFilterFactory::new);
         filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
         filters.put("unique", UniqueTokenFilterFactory::new);
@@ -343,6 +342,36 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         return filters;
     }
 
+    @Override
+    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
+        Map<String, AnalysisProvider<TokenFilterFactory>> filters = getTokenFilters();
+        filters.put(
+            "synonym",
+            requiresAnalysisSettings(
+                (indexSettings, environment, name, settings) -> new SynonymTokenFilterFactory(
+                    indexSettings,
+                    environment,
+                    name,
+                    settings,
+                    analysisModule.getAnalysisRegistry()
+                )
+            )
+        );
+        filters.put(
+            "synonym_graph",
+            requiresAnalysisSettings(
+                (indexSettings, environment, name, settings) -> new SynonymGraphTokenFilterFactory(
+                    indexSettings,
+                    environment,
+                    name,
+                    settings,
+                    analysisModule.getAnalysisRegistry()
+                )
+            )
+        );
+        return filters;
+    }
+
     @Override
     public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
         Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
index fed959108c411..c2e20e99473de 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
@@ -40,6 +40,7 @@
 import org.opensearch.env.Environment;
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.analysis.AnalysisMode;
+import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.analysis.CharFilterFactory;
 import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.index.analysis.TokenizerFactory;
@@ -49,8 +50,14 @@
 
 public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
 
-    SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
-        super(indexSettings, env, name, settings);
+    SynonymGraphTokenFilterFactory(
+        IndexSettings indexSettings,
+        Environment env,
+        String name,
+        Settings settings,
+        AnalysisRegistry analysisRegistry
+    ) {
+        super(indexSettings, env, name, settings, analysisRegistry);
     }
 
     @Override
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
index 01a65e87d7466..86417a579628c 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
@@ -44,11 +44,13 @@
 import org.opensearch.index.analysis.AbstractTokenFilterFactory;
 import org.opensearch.index.analysis.Analysis;
 import org.opensearch.index.analysis.AnalysisMode;
+import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.analysis.CharFilterFactory;
 import org.opensearch.index.analysis.CustomAnalyzer;
 import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.index.analysis.TokenizerFactory;
 
+import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.List;
@@ -64,8 +66,16 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
     protected final Settings settings;
     protected final Environment environment;
     protected final AnalysisMode analysisMode;
-
-    SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+    private final String synonymAnalyzer;
+    private final AnalysisRegistry analysisRegistry;
+
+    SynonymTokenFilterFactory(
+        IndexSettings indexSettings,
+        Environment env,
+        String name,
+        Settings settings,
+        AnalysisRegistry analysisRegistry
+    ) {
         super(indexSettings, name, settings);
         this.settings = settings;
 
@@ -83,6 +93,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
         boolean updateable = settings.getAsBoolean("updateable", false);
         this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
         this.environment = env;
+        this.synonymAnalyzer = settings.get("synonym_analyzer", null);
+        this.analysisRegistry = analysisRegistry;
     }
 
     @Override
@@ -137,6 +149,17 @@ Analyzer buildSynonymAnalyzer(
         List<TokenFilterFactory> tokenFilters,
         Function<String, TokenFilterFactory> allFilters
     ) {
+        if (synonymAnalyzer != null) {
+            Analyzer customSynonymAnalyzer;
+            try {
+                customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            if (customSynonymAnalyzer != null) {
+                return customSynonymAnalyzer;
+            }
+        }
         return new CustomAnalyzer(
             tokenizer,
             charFilters.toArray(new CharFilterFactory[0]),
@@ -177,5 +200,4 @@ Reader getRulesFromSettings(Environment env) {
         }
         return rulesReader;
     }
-
 }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
index 7e3140f8bcba3..baad489d33649 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -39,12 +39,16 @@
 import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
 import org.apache.lucene.analysis.te.TeluguNormalizationFilterFactory;
 import org.apache.lucene.analysis.te.TeluguStemFilterFactory;
+import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.indices.analysis.AnalysisFactoryTestCase;
+import org.opensearch.indices.analysis.AnalysisModule;
 
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.mockito.Mock;
+
 import static java.util.Collections.emptyList;
 import static java.util.stream.Collectors.toList;
 
@@ -53,6 +57,9 @@ public CommonAnalysisFactoryTests() {
         super(new CommonAnalysisModulePlugin());
     }
 
+    @Mock
+    private AnalysisModule analysisModule;
+
     @Override
     protected Map<String, Class<?>> getTokenizers() {
         Map<String, Class<?>> tokenizers = new TreeMap<>(super.getTokenizers());
@@ -302,4 +309,39 @@ private void markedTestCase(String name, Map<String, Class<?>> map) {
             unmarked
         );
     }
+
+    /**
+     * Tests the getTokenFilters(AnalysisModule) method to verify:
+     * 1. All token filters are properly loaded
+     * 2. Basic filters remain available
+     * 3. Synonym filters are added when AnalysisModule is provided
+     * 4. The total number of filters is correct (base filters + synonym filters)
+     */
+    public void testGetTokenFiltersWithAnalysisModule() {
+        CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
+        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> filters = plugin.getTokenFilters(analysisModule);
+        assertNotNull("Token filters should not be null", filters);
+        assertTrue("Should contain basic filters", filters.containsKey("lowercase"));
+        assertTrue("Should contain synonym filter", filters.containsKey("synonym"));
+        assertTrue("Should contain synonym_graph filter", filters.containsKey("synonym_graph"));
+        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
+        assertEquals("Should contain additional synonym filters", baseFilters.size() + 2, filters.size());
+    }
+
+    /**
+     * Tests that synonym-related token filters are only available when an AnalysisModule is provided.
+     * This test verifies that:
+     * 1. Base getTokenFilters() does not include synonym filters
+     * 2. Extended getTokenFilters(AnalysisModule) includes synonym filters
+     * 3. Both synonym and synonym_graph filters require AnalysisModule
+     */
+    public void testSynonymFiltersRequireAnalysisModule() {
+        CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
+        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
+        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extendedFilters = plugin.getTokenFilters(analysisModule);
+        assertFalse("Base filters should not contain synonym filter", baseFilters.containsKey("synonym"));
+        assertTrue("Extended filters should contain synonym filter", extendedFilters.containsKey("synonym"));
+        assertFalse("Base filters should not contain synonym_graph filter", baseFilters.containsKey("synonym_graph"));
+        assertTrue("Extended filters should contain synonym_graph filter", extendedFilters.containsKey("synonym_graph"));
+    }
 }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
index 8c8b8ac7f61c0..e7b378701c166 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
@@ -41,11 +41,14 @@
 import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.env.Environment;
+import org.opensearch.env.TestEnvironment;
 import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.analysis.IndexAnalyzers;
 import org.opensearch.index.analysis.PreConfiguredTokenFilter;
 import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
 import org.opensearch.test.IndexSettingsModule;
 import org.opensearch.test.OpenSearchTestCase;
 import org.opensearch.test.VersionUtils;
@@ -63,6 +66,7 @@
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.startsWith;
+import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
 
 public class SynonymsAnalysisTests extends OpenSearchTestCase {
     private IndexAnalyzers indexAnalyzers;
@@ -255,14 +259,18 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
             .put("hyphenation_patterns_path", "foo")
             .build();
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        Environment environment = TestEnvironment.newEnvironment(settings);
 
+        // Initialize analysis registry
+        AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
+        AnalysisRegistry analysisRegistry = module.getAnalysisRegistry();
         String[] bypassingFactories = new String[] { "dictionary_decompounder" };
 
         CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
         for (String factory : bypassingFactories) {
-            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
-            TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
-            SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
+            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
+            TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
+            SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
             Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
 
             try (TokenStream ts = analyzer.tokenStream("field", "text")) {
@@ -319,7 +327,13 @@ public void testDisallowedTokenFilters() throws IOException {
             .putList("common_words", "a", "b")
             .put("output_unigrams", "true")
             .build();
+
+        Environment environment = TestEnvironment.newEnvironment(settings);
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+
+        // Create analysis module
+        AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
+        AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
         CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
 
         String[] disallowedFactories = new String[] {
@@ -333,9 +347,9 @@ public void testDisallowedTokenFilters() throws IOException {
             "fingerprint" };
 
         for (String factory : disallowedFactories) {
-            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
-            TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
-            SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
+            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
+            TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
+            SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
 
             IllegalArgumentException e = expectThrows(
                 IllegalArgumentException.class,
@@ -362,4 +376,75 @@ private void match(String analyzerName, String source, String target) throws IOE
         MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
     }
 
+    /**
+     * Tests the integration of word delimiter and synonym graph filters with synonym_analyzer based on issue #16263.
+     * This test verifies the correct handling of:
+     * 1. Hyphenated words with word delimiter (e.g., "note-book" → ["notebook", "note", "book"])
+     * 2. Multi-word synonyms (e.g., "mobile phone" → ["smartphone"])
+     * 3. Single word synonyms (e.g., "laptop" → ["notebook"])
+     *
+     * @see <a href="https://github.com/opensearch-project/OpenSearch/issues/16263">Issue #16263</a>
+     */
+    public void testSynonymAnalyzerWithWordDelimiter() throws IOException {
+        Settings settings = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+            .put("path.home", createTempDir().toString())
+            .put("index.analysis.filter.custom_word_delimiter.type", "word_delimiter_graph")
+            .put("index.analysis.filter.custom_word_delimiter.generate_word_parts", true)
+            .put("index.analysis.filter.custom_word_delimiter.catenate_all", true)
+            .put("index.analysis.filter.custom_word_delimiter.split_on_numerics", false)
+            .put("index.analysis.filter.custom_word_delimiter.split_on_case_change", false)
+            .put("index.analysis.filter.custom_pattern_replace_filter.type", "pattern_replace")
+            .put("index.analysis.filter.custom_pattern_replace_filter.pattern", "(-)")
+            .put("index.analysis.filter.custom_pattern_replace_filter.replacement", " ")
+            .put("index.analysis.filter.custom_pattern_replace_filter.all", true)
+            .put("index.analysis.filter.custom_synonym_graph_filter.type", "synonym_graph")
+            .putList(
+                "index.analysis.filter.custom_synonym_graph_filter.synonyms",
+                "laptop => notebook",
+                "smartphone, mobile phone, cell phone => smartphone",
+                "tv, television => television"
+            )
+            .put("index.analysis.filter.custom_synonym_graph_filter.synonym_analyzer", "standard")
+            .put("index.analysis.analyzer.text_en_index.type", "custom")
+            .put("index.analysis.analyzer.text_en_index.tokenizer", "whitespace")
+            .putList(
+                "index.analysis.analyzer.text_en_index.filter",
+                "lowercase",
+                "custom_word_delimiter",
+                "custom_synonym_graph_filter",
+                "custom_pattern_replace_filter",
+                "flatten_graph"
+            )
+            .build();
+        Environment environment = TestEnvironment.newEnvironment(settings);
+        IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
+        AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
+        IndexAnalyzers analyzers = module.getAnalysisRegistry().build(indexSettings);
+        try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "note-book")) {
+            assertTokenStreamContents(
+                ts,
+                new String[] { "notebook", "note", "book" },
+                new int[] { 0, 0, 5 },
+                new int[] { 9, 4, 9 },
+                new String[] { "word", "word", "word" },
+                new int[] { 1, 0, 1 },
+                new int[] { 2, 1, 1 }
+            );
+        }
+        try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "mobile phone")) {
+            assertTokenStreamContents(
+                ts,
+                new String[] { "smartphone" },
+                new int[] { 0 },
+                new int[] { 12 },
+                new String[] { "SYNONYM" },
+                new int[] { 1 },
+                new int[] { 1 }
+            );
+        }
+        try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "laptop")) {
+            assertTokenStreamContents(ts, new String[] { "notebook" }, new int[] { 0 }, new int[] { 6 });
+        }
+    }
 }
diff --git a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
index 0926d497087d1..dbb3035a18f74 100644
--- a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
+++ b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
@@ -165,7 +165,12 @@ public boolean requiresAnalysisSettings() {
             )
         );
 
-        tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
+        for (AnalysisPlugin plugin : plugins) {
+            Map<String, AnalysisProvider<TokenFilterFactory>> filters = plugin.getTokenFilters(this);
+            for (Map.Entry<String, AnalysisProvider<TokenFilterFactory>> entry : filters.entrySet()) {
+                tokenFilters.register(entry.getKey(), entry.getValue());
+            }
+        }
         return tokenFilters;
     }
 
diff --git a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
index 53dcc916b244f..a7c4604a30553 100644
--- a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
+++ b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
@@ -47,6 +47,7 @@
 import org.opensearch.index.analysis.PreConfiguredTokenizer;
 import org.opensearch.index.analysis.TokenFilterFactory;
 import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
 import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
 
 import java.io.IOException;
@@ -84,6 +85,14 @@ default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
         return emptyMap();
     }
 
+    /**
+     * Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
+     * The default implementation calls the existing getTokenFilters() method for backward compatibility.
+     */
+    default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
+        return getTokenFilters();
+    }
+
     /**
      * Override to add additional {@link TokenFilter}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
      * how to on get the configuration from the index.
diff --git a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
index c9e26d6d6159a..625f79b26b01b 100644
--- a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
+++ b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
@@ -56,6 +56,8 @@
 import org.opensearch.index.analysis.CustomAnalyzer;
 import org.opensearch.index.analysis.IndexAnalyzers;
 import org.opensearch.index.analysis.MyFilterTokenFilterFactory;
+import org.opensearch.index.analysis.NameOrDefinition;
+import org.opensearch.index.analysis.NamedAnalyzer;
 import org.opensearch.index.analysis.PreConfiguredCharFilter;
 import org.opensearch.index.analysis.PreConfiguredTokenFilter;
 import org.opensearch.index.analysis.PreConfiguredTokenizer;
@@ -80,6 +82,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -521,4 +524,57 @@ public boolean incrementToken() throws IOException {
         }
     }
 
+    /**
+     * Tests registration and functionality of token filters that require access to the AnalysisModule.
+     * This test verifies:
+     * 1. Token filter registration using the extended getTokenFilters(AnalysisModule) method
+     * 2. Filter functionality in both predefined and custom analyzers
+     * 3. Proper access to AnalysisModule reference within filter factory creation
+     */
+    public void testTokenFilterRegistrationWithModuleReference() throws IOException {
+        class TestPlugin implements AnalysisPlugin {
+            @Override
+            public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule module) {
+                return Map.of(
+                    "test_filter",
+                    (indexSettings, env, name, settings) -> AppendTokenFilter.factoryForSuffix("_" + module.hashCode())
+                );
+            }
+        }
+        Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+            .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
+            .put("index.analysis.analyzer.my_analyzer.filter", "test_filter")
+            .build();
+        Environment environment = TestEnvironment.newEnvironment(settings);
+        AnalysisModule module = new AnalysisModule(environment, singletonList(new TestPlugin()));
+        AnalysisRegistry registry = module.getAnalysisRegistry();
+        IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.builder().put(settings).build());
+        Map<String, TokenFilterFactory> tokenFilterFactories = registry.buildTokenFilterFactories(indexSettings);
+        assertTrue("Token filter 'test_filter' should be registered", tokenFilterFactories.containsKey("test_filter"));
+        IndexAnalyzers analyzers = registry.build(indexSettings);
+        String testText = "test";
+        TokenStream tokenStream = analyzers.get("my_analyzer").tokenStream("", testText);
+        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+        tokenStream.reset();
+        assertTrue("Should have found a token", tokenStream.incrementToken());
+        assertEquals("Token should have expected suffix", "test_" + module.hashCode(), charTermAttribute.toString());
+        assertFalse("Should not have additional tokens", tokenStream.incrementToken());
+        tokenStream.close();
+        NamedAnalyzer customAnalyzer = registry.buildCustomAnalyzer(
+            indexSettings,
+            false,
+            new NameOrDefinition("standard"),
+            Collections.emptyList(),
+            Collections.singletonList(new NameOrDefinition("test_filter"))
+        );
+        tokenStream = customAnalyzer.tokenStream("", testText);
+        charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+        tokenStream.reset();
+        assertTrue("Custom analyzer should produce a token", tokenStream.incrementToken());
+        assertEquals("Custom analyzer token should have expected suffix", "test_" + module.hashCode(), charTermAttribute.toString());
+        assertFalse("Custom analyzer should not produce additional tokens", tokenStream.incrementToken());
+        tokenStream.close();
+    }
 }
diff --git a/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
index 23cf4d47a49d9..ca23f67215f3d 100644
--- a/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -248,6 +248,17 @@ public AnalysisFactoryTestCase(AnalysisPlugin plugin) {
         this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core");
     }
 
+    /**
+     * Returns the AnalysisPlugin instance that was passed to this test case.
+     * This protected method allows subclasses to access the plugin for testing
+     * specific analysis components.
+     *
+     * @return The AnalysisPlugin instance used by this test case
+     */
+    protected AnalysisPlugin getAnalysisPlugin() {
+        return plugin;
+    }
+
     protected Map<String, Class<?>> getCharFilters() {
         return KNOWN_CHARFILTERS;
     }

From 7d6fc4398907c5a172c7f7fe6b70a685a5dc6c26 Mon Sep 17 00:00:00 2001
From: Prudhvi Godithi <pgodithi@amazon.com>
Date: Sat, 2 Nov 2024 12:46:07 -0700
Subject: [PATCH 2/2] synonym_analyzer configuration setting

Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
---
 CHANGELOG.md                                  |  1 +
 .../gradle/testclusters/OpenSearchNode.java   | 12 ++++------
 .../common/CommonAnalysisModulePlugin.java    |  8 +------
 .../common/SynonymTokenFilterFactory.java     |  8 +++----
 .../common/CommonAnalysisFactoryTests.java    | 22 +------------------
 .../common/SynonymsAnalysisTests.java         | 12 ++++------
 .../opensearch/plugins/AnalysisPlugin.java    |  2 +-
 .../indices/analysis/AnalysisModuleTests.java |  5 +----
 8 files changed, 17 insertions(+), 53 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index edbf7c8ed065c..c875f440ad7f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Add Setting to adjust the primary constraint weights ([#16471](https://github.com/opensearch-project/OpenSearch/pull/16471))
 - Switch from `buildSrc/version.properties` to Gradle version catalog (`gradle/libs.versions.toml`) to enable dependabot to perform automated upgrades on common libs ([#16284](https://github.com/opensearch-project/OpenSearch/pull/16284))
 - Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483/files))
+- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
 
 ### Dependencies
 - Bump `com.azure:azure-storage-common` from 12.25.1 to 12.27.1 ([#16521](https://github.com/opensearch-project/OpenSearch/pull/16521))
diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
index bb409c2afd871..cd22560af9a96 100644
--- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
+++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java
@@ -1216,18 +1216,14 @@ private void createConfiguration() {
             );
 
             final List<Path> configFiles;
-            try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
+            try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
                 configFiles = stream.collect(Collectors.toList());
             }
             logToProcessStdout("Copying additional config files from distro " + configFiles);
             for (Path file : configFiles) {
-                Path relativePath = getDistroDir().resolve("config").relativize(file);
-                Path dest = configFile.getParent().resolve(relativePath);
-                if (Files.isDirectory(file)) {
-                    Files.createDirectories(dest);
-                } else {
-                    Files.createDirectories(dest.getParent());
-                    Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
+                Path dest = configFile.getParent().resolve(file.getFileName());
+                if (Files.exists(dest) == false) {
+                    Files.copy(file, dest);
                 }
             }
         } catch (IOException e) {
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
index 763c1783c2d28..7f9437972a358 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java
@@ -248,7 +248,7 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
     }
 
     @Override
-    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
+    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
         Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
         filters.put("apostrophe", ApostropheFilterFactory::new);
         filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new);
@@ -339,12 +339,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("uppercase", UpperCaseTokenFilterFactory::new);
         filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
         filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
-        return filters;
-    }
-
-    @Override
-    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
-        Map<String, AnalysisProvider<TokenFilterFactory>> filters = getTokenFilters();
         filters.put(
             "synonym",
             requiresAnalysisSettings(
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
index 86417a579628c..1cd78170e66c8 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
@@ -66,7 +66,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
     protected final Settings settings;
     protected final Environment environment;
     protected final AnalysisMode analysisMode;
-    private final String synonymAnalyzer;
+    private final String synonymAnalyzerName;
     private final AnalysisRegistry analysisRegistry;
 
     SynonymTokenFilterFactory(
@@ -93,7 +93,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
         boolean updateable = settings.getAsBoolean("updateable", false);
         this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
         this.environment = env;
-        this.synonymAnalyzer = settings.get("synonym_analyzer", null);
+        this.synonymAnalyzerName = settings.get("synonym_analyzer", null);
         this.analysisRegistry = analysisRegistry;
     }
 
@@ -149,10 +149,10 @@ Analyzer buildSynonymAnalyzer(
         List<TokenFilterFactory> tokenFilters,
         Function<String, TokenFilterFactory> allFilters
     ) {
-        if (synonymAnalyzer != null) {
+        if (synonymAnalyzerName != null) {
             Analyzer customSynonymAnalyzer;
             try {
-                customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
+                customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzerName);
             } catch (IOException e) {
                 throw new RuntimeException(e);
             }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
index baad489d33649..1f4faf53dced5 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -314,8 +314,7 @@ private void markedTestCase(String name, Map<String, Class<?>> map) {
      * Tests the getTokenFilters(AnalysisModule) method to verify:
      * 1. All token filters are properly loaded
      * 2. Basic filters remain available
-     * 3. Synonym filters are added when AnalysisModule is provided
-     * 4. The total number of filters is correct (base filters + synonym filters)
+     * 3. Synonym filters remain available when AnalysisModule is provided
      */
     public void testGetTokenFiltersWithAnalysisModule() {
         CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
@@ -324,24 +323,5 @@ public void testGetTokenFiltersWithAnalysisModule() {
         assertTrue("Should contain basic filters", filters.containsKey("lowercase"));
         assertTrue("Should contain synonym filter", filters.containsKey("synonym"));
         assertTrue("Should contain synonym_graph filter", filters.containsKey("synonym_graph"));
-        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
-        assertEquals("Should contain additional synonym filters", baseFilters.size() + 2, filters.size());
-    }
-
-    /**
-     * Tests that synonym-related token filters are only available when an AnalysisModule is provided.
-     * This test verifies that:
-     * 1. Base getTokenFilters() does not include synonym filters
-     * 2. Extended getTokenFilters(AnalysisModule) includes synonym filters
-     * 3. Both synonym and synonym_graph filters require AnalysisModule
-     */
-    public void testSynonymFiltersRequireAnalysisModule() {
-        CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
-        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
-        Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extendedFilters = plugin.getTokenFilters(analysisModule);
-        assertFalse("Base filters should not contain synonym filter", baseFilters.containsKey("synonym"));
-        assertTrue("Extended filters should contain synonym filter", extendedFilters.containsKey("synonym"));
-        assertFalse("Base filters should not contain synonym_graph filter", baseFilters.containsKey("synonym_graph"));
-        assertTrue("Extended filters should contain synonym_graph filter", extendedFilters.containsKey("synonym_graph"));
     }
 }
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
index e7b378701c166..33d92e01a85b1 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
@@ -260,15 +260,13 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
             .build();
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
         Environment environment = TestEnvironment.newEnvironment(settings);
-
-        // Initialize analysis registry
-        AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
-        AnalysisRegistry analysisRegistry = module.getAnalysisRegistry();
+        AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
+        AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
         String[] bypassingFactories = new String[] { "dictionary_decompounder" };
 
         CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
         for (String factory : bypassingFactories) {
-            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
+            TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
             TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
             SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
             Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
@@ -330,8 +328,6 @@ public void testDisallowedTokenFilters() throws IOException {
 
         Environment environment = TestEnvironment.newEnvironment(settings);
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
-
-        // Create analysis module
         AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
         AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
         CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
@@ -347,7 +343,7 @@ public void testDisallowedTokenFilters() throws IOException {
             "fingerprint" };
 
         for (String factory : disallowedFactories) {
-            TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
+            TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
             TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
             SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
 
diff --git a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
index a7c4604a30553..58e43633777c9 100644
--- a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
+++ b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
@@ -87,7 +87,7 @@ default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
 
     /**
      * Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
-     * The default implementation calls the existing getTokenFilters() method for backward compatibility.
+     * The default implementation for plugins that don't need AnalysisModule calls the existing getTokenFilters() method.
      */
     default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
         return getTokenFilters();
diff --git a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
index 625f79b26b01b..74bc987c44b15 100644
--- a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
+++ b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
@@ -526,10 +526,7 @@ public boolean incrementToken() throws IOException {
 
     /**
      * Tests registration and functionality of token filters that require access to the AnalysisModule.
-     * This test verifies:
-     * 1. Token filter registration using the extended getTokenFilters(AnalysisModule) method
-     * 2. Filter functionality in both predefined and custom analyzers
-     * 3. Proper access to AnalysisModule reference within filter factory creation
+     * This test verifies the token filter registration using the extended getTokenFilters(AnalysisModule) method
      */
     public void testTokenFilterRegistrationWithModuleReference() throws IOException {
         class TestPlugin implements AnalysisPlugin {