Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WLM Auto tagging] Add compressed trie structure to store Rules #16971

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/))
- Introduce Template query ([#16818](https://github.com/opensearch-project/OpenSearch/pull/16818))
- Propagate the sourceIncludes and excludes fields from fetchSourceContext to FieldsVisitor. ([#17080](https://github.com/opensearch-project/OpenSearch/pull/17080))
- [Automated Tagging] Add In-memory data structure to store and process Rules. ([#16971](https://github.com/opensearch-project/OpenSearch/pull/16971))

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.plugin.wlm.rule.structure;

import java.util.List;

/**
* Common interface which exposes methods to add/search/delete Rule attributes
*/
public interface FastPrefixMatchingStructure {
/**
* Inserts the rule output against the attribute value denoted by key
* @param key
* @param value
*/
void insert(String key, String value);

/**
* Searches for a key in structure.
*
* @param key The key to search for.
* @return A list of string values associated with the key or its prefixes.
* Returns an empty list if no matches are found.
*/
List<String> search(String key);

/**
* Deletes a key from the structure.
*
* @param key The key to be deleted.
* @return true if the key was successfully deleted, false otherwise.
*/
boolean delete(String key);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.plugin.wlm.rule.structure;

import java.util.List;

/**
* Per attribute in memory storage structure for Rules
*/
public class RuleAttributeTrie implements FastPrefixMatchingStructure {
private static final String ALLOWED_ATTRIBUTE_VALUES = "^[a-zA-Z0-9-_]+\\*?$";
private static final int ATTRIBUTE_MAX_LENGTH = 100;
private TrieNode root;

/**
* Constructs an empty Trie.
*/
public RuleAttributeTrie() {
root = new TrieNode("");
}

/**
* Inserts a key-value pair into the trie.
*
* @param key The key to be inserted.
* @param value The value associated with the key.
*/
public void insert(String key, String value) {
if (!isValidValue(value)) {
throw new IllegalArgumentException(
"Invalid attribute value: " + value + " it should match the regex " + ALLOWED_ATTRIBUTE_VALUES
);
}
TrieInserter inserter = new TrieInserter(root, key, value);
root = inserter.insert();
}

private boolean isValidValue(String value) {
return value.length() <= ATTRIBUTE_MAX_LENGTH && value.matches(ALLOWED_ATTRIBUTE_VALUES);
}

/**
* Searches for a key in the trie.
*
* @param key The key to search for.
* @return A list of string values associated with the key or its prefixes.
* Returns an empty list if no matches are found.
*/
public List<String> search(String key) {
TrieSearcher searcher = new TrieSearcher(root, key);
return searcher.search();
}

/**
* Deletes a key from the trie.
*
* @param key The key to be deleted.
* @return true if the key was successfully deleted, false otherwise.
*/
public boolean delete(String key) {
TrieDeleter deleter = new TrieDeleter(root, key);
return deleter.delete();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.plugin.wlm.rule.structure;

/**
* Handles the deletion operation for the Trie.
*/
class TrieDeleter {
private TrieNode root;
private String key;

/**
* Constructs a TrieDeleter with the given root and key.
*
* @param root The root node of the trie.
* @param key The key to be deleted.
*/
public TrieDeleter(TrieNode root, String key) {
this.root = root;
this.key = key;
}

/**
* Performs the deletion operation.
*
* @return true if the key was successfully deleted, false otherwise.
*/
public boolean delete() {
TrieNode current = root;
TrieNode parent = null;
String remainingKey = key;
while (!remainingKey.isEmpty()) {
TrieNode childNode = current.findCommonPrefixChild(remainingKey);

if (childNode == null) {
return false;
}
parent = current;
current = childNode;
remainingKey = remainingKey.substring(childNode.getKey().length());
}
final boolean deleted = current.isEndOfWord();

if (deleted) {
current.setEndOfWord(false);
current.setValue(null);
if (current.getChildren().isEmpty()) {
deleteLeafNode(parent, current);
}
}

return deleted;
}

private static void deleteLeafNode(TrieNode parent, TrieNode current) {
if (parent != null) {
parent.getChildren().remove(current.getKey());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.plugin.wlm.rule.structure;

/**
* Handles the insertion operation for the Trie.
*/
class TrieInserter {
private TrieNode root;
private String key;
private String value;

/**
* Constructs a TrieInserter with the given root, key, and value.
*
* @param root The root node of the trie.
* @param key The key to be inserted.
* @param value The value associated with the key.
*/
public TrieInserter(TrieNode root, String key, String value) {
this.root = root;
this.key = key;
this.value = value;
}

/**
* Performs the insertion operation.
* Method should handle 3 cases
* <ol>
* <li>Simple addition of new child </li>
* <li>insert splits a node</li>
* <li>inserted key is a prefix to existing key|s, this could either mark a node as endOfWord or it could also split the node</li>
* </ol>
* @return The root node of the trie after insertion.
*/
public TrieNode insert() {
TrieNode current = root;
String remainingKey = key;
while (!remainingKey.isEmpty()) {
TrieNode child = current.findCommonPrefixChild(remainingKey);

if (child == null) {
boolean partialMatch = false;
// partial match
for (String childKey : current.getChildren().keySet()) {
int commonPrefixLength = getLongestCommonPrefixLength(childKey, remainingKey);
if (commonPrefixLength > 0) {
TrieNode newNode = current.splitNode(childKey, commonPrefixLength);

remainingKey = remainingKey.substring(commonPrefixLength);

current = newNode;
partialMatch = true;
break;
}
}
// no match
if (!partialMatch) {
current = current.addNewChild(remainingKey);
remainingKey = "";
}
} else {
current = child;
remainingKey = remainingKey.substring(child.getKey().length());
}
}
updateNodeValue(current);
return root;
}

private void updateNodeValue(TrieNode node) {
node.setValue(value);
node.setEndOfWord(true);
}

private int getLongestCommonPrefixLength(String str1, String str2) {
int minLength = Math.min(str1.length(), str2.length());
for (int i = 0; i < minLength; i++) {
if (str1.charAt(i) != str2.charAt(i)) {
return i;
}
}
return minLength;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.plugin.wlm.rule.structure;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;

/**
* Represents a node in the Trie.
* Each node contains a key, an optional value, and references to child nodes.
*/
class TrieNode {
public static final int CLOSEST_LIMIT = 5;
private Map<String, TrieNode> children;
private String key;
private String value;
private boolean isEndOfWord;

/**
* Constructs a TrieNode with the given key.
*
* @param key The key associated with this node.
*/
public TrieNode(String key) {
this.children = new HashMap<>();
this.key = key;
this.value = null;
this.isEndOfWord = false;
}

// Getters and setters
public Map<String, TrieNode> getChildren() {
return children;
}

public String getKey() {
return key;
}

public void setKey(String key) {
this.key = key;
}

public String getValue() {
return value;
}

public void setValue(String value) {
this.value = value;
}

public boolean isEndOfWord() {
return isEndOfWord;
}

public void setEndOfWord(boolean endOfWord) {
isEndOfWord = endOfWord;
}

public TrieNode addNewChild(String key) {
TrieNode newNode = new TrieNode(key);
newNode.setValue(value);
newNode.setEndOfWord(true);
getChildren().put(key, newNode);
return newNode;
}

public TrieNode splitNode(String childKey, int commonPrefixLength) {
String commonPrefix = childKey.substring(0, commonPrefixLength);
TrieNode newNode = new TrieNode(commonPrefix);
TrieNode childNode = getChildren().get(childKey);

// remove the existing partially matching child node since we will split that
getChildren().remove(childKey);
// re-attach common prefix as direct child
getChildren().put(commonPrefix, newNode);

childNode.setKey(childKey.substring(commonPrefixLength));

newNode.getChildren().put(childKey.substring(commonPrefixLength), childNode);
return newNode;
}

public TrieNode findCommonPrefixChild(String key) {
return getChildren().entrySet()
.stream()
.filter(entry -> key.startsWith(entry.getKey()))
.findFirst()
.map(Map.Entry::getValue)
.orElse(null);
}

public List<String> findTopFiveClosest() {
List<String> ans = new ArrayList<>(CLOSEST_LIMIT);
Queue<TrieNode> queue = new LinkedList<>();
queue.offer(this);

while (!queue.isEmpty() && ans.size() < CLOSEST_LIMIT) {
TrieNode current = queue.poll();
if (current.isEndOfWord()) {
ans.add(current.getValue());
}
queue.addAll(current.getChildren().values());
}

return ans;
}
}
Loading
Loading