Skip to content

Commit

Permalink
Add custom synonym_analyzer
Browse files Browse the repository at this point in the history
Signed-off-by: Prudhvi Godithi <[email protected]>
  • Loading branch information
prudhvigodithi committed Oct 30, 2024
1 parent 6f1b59e commit 7a5c00e
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1216,14 +1216,18 @@ private void createConfiguration() {
);

final List<Path> configFiles;
try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
configFiles = stream.collect(Collectors.toList());
}
logToProcessStdout("Copying additional config files from distro " + configFiles);
for (Path file : configFiles) {
Path dest = configFile.getParent().resolve(file.getFileName());
if (Files.exists(dest) == false) {
Files.copy(file, dest);
Path relativePath = getDistroDir().resolve("config").relativize(file);
Path dest = configFile.getParent().resolve(relativePath);
if (Files.isDirectory(file)) {
Files.createDirectories(dest);
} else {
Files.createDirectories(dest.getParent());
Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
}
}
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
import org.opensearch.index.analysis.PreConfiguredTokenizer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.opensearch.plugins.AnalysisPlugin;
Expand All @@ -157,9 +158,11 @@
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.watcher.ResourceWatcherService;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
Expand All @@ -176,6 +179,7 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin

private final SetOnce<ScriptService> scriptService = new SetOnce<>();


@Override
public Collection<Object> createComponents(
Client client,
Expand All @@ -194,6 +198,7 @@ public Collection<Object> createComponents(
return Collections.emptyList();
}


@Override
public List<ScriptContext<?>> getContexts() {
return Collections.singletonList(AnalysisPredicateScript.CONTEXT);
Expand Down Expand Up @@ -332,8 +337,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);
Expand All @@ -343,6 +346,18 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return filters;
}

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = getTokenFilters();
filters.put("synonym", requiresAnalysisSettings((indexSettings, environment, name, settings) ->
new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
));
filters.put("synonym_graph", requiresAnalysisSettings((indexSettings, environment, name, settings) ->
new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
));
return filters;
}

@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AnalysisMode;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.analysis.CharFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
Expand All @@ -49,8 +50,8 @@

public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {

SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
super(indexSettings, env, name, settings, analysisRegistry);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
import org.opensearch.index.analysis.CustomAnalyzer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.index.analysis.AnalysisRegistry;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
Expand All @@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;
protected final AnalysisMode analysisMode;
private final String synonymAnalyzer;
private final AnalysisRegistry analysisRegistry;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
super(indexSettings, name, settings);
this.settings = settings;

Expand All @@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env;
this.synonymAnalyzer = settings.get("synonym_analyzer", null);
this.analysisRegistry = analysisRegistry;
}

@Override
Expand Down Expand Up @@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer(
List<TokenFilterFactory> tokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
if (synonymAnalyzer != null) {
Analyzer customSynonymAnalyzer;
try {
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (customSynonymAnalyzer != null) {
return customSynonymAnalyzer;
}
}
return new CustomAnalyzer(
tokenizer,
charFilters.toArray(new CharFilterFactory[0]),
Expand Down Expand Up @@ -177,5 +194,4 @@ Reader getRulesFromSettings(Environment env) {
}
return rulesReader;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,12 @@ public boolean requiresAnalysisSettings() {
)
);

tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
for (AnalysisPlugin plugin : plugins) {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = plugin.getTokenFilters(this);
for (Map.Entry<String, AnalysisProvider<TokenFilterFactory>> entry : filters.entrySet()) {
tokenFilters.register(entry.getKey(), entry.getValue());
}
}
return tokenFilters;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.opensearch.index.analysis.PreConfiguredTokenizer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;

import java.io.IOException;
Expand Down Expand Up @@ -84,6 +85,14 @@ default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return emptyMap();
}

/**
* Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
* The default implementation calls the existing getTokenFilters() method for backward compatibility.
*/
default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
return getTokenFilters();
}

/**
* Override to add additional {@link TokenFilter}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
* how to on get the configuration from the index.
Expand Down

0 comments on commit 7a5c00e

Please sign in to comment.