Skip to content

Commit

Permalink
Add custom synonym_analyzer
Browse files Browse the repository at this point in the history
Signed-off-by: Prudhvi Godithi <[email protected]>
  • Loading branch information
prudhvigodithi committed Oct 30, 2024
1 parent 6f1b59e commit 550756f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1216,14 +1216,18 @@ private void createConfiguration() {
);

final List<Path> configFiles;
try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
configFiles = stream.collect(Collectors.toList());
}
logToProcessStdout("Copying additional config files from distro " + configFiles);
for (Path file : configFiles) {
Path dest = configFile.getParent().resolve(file.getFileName());
if (Files.exists(dest) == false) {
Files.copy(file, dest);
Path relativePath = getDistroDir().resolve("config").relativize(file);
Path dest = configFile.getParent().resolve(relativePath);
if (Files.isDirectory(file)) {
Files.createDirectories(dest);
} else {
Files.createDirectories(dest.getParent());
Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
}
}
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
import org.opensearch.index.analysis.PreConfiguredTokenizer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.opensearch.plugins.AnalysisPlugin;
Expand All @@ -157,6 +158,7 @@
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.watcher.ResourceWatcherService;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -176,6 +178,9 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin

private final SetOnce<ScriptService> scriptService = new SetOnce<>();

private AnalysisModule analysisModule;


@Override
public Collection<Object> createComponents(
Client client,
Expand All @@ -191,9 +196,16 @@ public Collection<Object> createComponents(
Supplier<RepositoriesService> repositoriesServiceSupplier
) {
this.scriptService.set(scriptService);
try {
this.analysisModule = new AnalysisModule(environment,
List.of(this));
} catch (IOException e) {
throw new RuntimeException(e);
}
return Collections.emptyList();
}


@Override
public List<ScriptContext<?>> getContexts() {
return Collections.singletonList(AnalysisPredicateScript.CONTEXT);
Expand Down Expand Up @@ -332,8 +344,13 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("synonym", (indexSettings, environment, name, settings) ->
new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
);
filters.put("synonym_graph", (indexSettings, environment, name, settings) ->
new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
);

filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AnalysisMode;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.analysis.CharFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
Expand All @@ -49,8 +50,8 @@

public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {

SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
super(indexSettings, env, name, settings, analysisRegistry);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
import org.opensearch.index.analysis.CustomAnalyzer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
import org.opensearch.index.analysis.AnalysisRegistry;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
Expand All @@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;
protected final AnalysisMode analysisMode;
private final String synonymAnalyzer;
private final AnalysisRegistry analysisRegistry;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
super(indexSettings, name, settings);
this.settings = settings;

Expand All @@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env;
this.synonymAnalyzer = settings.get("synonym_analyzer", null);
this.analysisRegistry = analysisRegistry;
}

@Override
Expand Down Expand Up @@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer(
List<TokenFilterFactory> tokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
if (synonymAnalyzer != null) {
Analyzer customSynonymAnalyzer;
try {
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (customSynonymAnalyzer != null) {
return customSynonymAnalyzer;
}
}
return new CustomAnalyzer(
tokenizer,
charFilters.toArray(new CharFilterFactory[0]),
Expand Down Expand Up @@ -177,5 +194,4 @@ Reader getRulesFromSettings(Environment env) {
}
return rulesReader;
}

}

0 comments on commit 550756f

Please sign in to comment.