opensearch-project · kaushalmahi12 · Dec 18, 2024 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
@@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/))
 - Introduce Template query ([#16818](https://github.com/opensearch-project/OpenSearch/pull/16818))
 - Propagate the sourceIncludes and excludes fields from fetchSourceContext to FieldsVisitor. ([#17080](https://github.com/opensearch-project/OpenSearch/pull/17080))
+- [Automated Tagging] Add In-memory data structure to store and process Rules. ([#16971](https://github.com/opensearch-project/OpenSearch/pull/16971))
 
 ### Dependencies
 - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

@@ -0,0 +1,40 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.wlm.rule.structure;
+
+import java.util.List;
+
+/**
+ * Common interface which exposes methods to add/search/delete Rule attributes
+ */
+public interface FastPrefixMatchingStructure {
+    /**
+     * Inserts the rule output against the attribute value denoted by key
+     * @param key
+     * @param value
+     */
+    void insert(String key, String value);
+
+    /**
+     * Searches for a key in structure.
+     *
+     * @param key The key to search for.
+     * @return A list of string values associated with the key or its prefixes.
+     *         Returns an empty list if no matches are found.
+     */
+    List<String> search(String key);
+
+    /**
+     * Deletes a key from the structure.
+     *
+     * @param key The key to be deleted.
+     * @return true if the key was successfully deleted, false otherwise.
+     */
+    boolean delete(String key);
+}
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.wlm.rule.structure;
+
+import java.util.List;
+
+/**
+ * Per attribute in memory storage structure for Rules
+ */
+public class RuleAttributeTrie implements FastPrefixMatchingStructure {
+    private static final String ALLOWED_ATTRIBUTE_VALUES = "^[a-zA-Z0-9-_]+\\*?$";
+    private static final int ATTRIBUTE_MAX_LENGTH = 100;
+    private TrieNode root;
+
+    /**
+     * Constructs an empty Trie.
+     */
+    public RuleAttributeTrie() {
+        root = new TrieNode("");
+    }
+
+    /**
+     * Inserts a key-value pair into the trie.
+     *
+     * @param key   The key to be inserted.
+     * @param value The value associated with the key.
+     */
+    public void insert(String key, String value) {
+        if (!isValidValue(value)) {
+            throw new IllegalArgumentException(
+                "Invalid attribute value: " + value + " it should match the regex " + ALLOWED_ATTRIBUTE_VALUES
+            );
+        }
+        TrieInserter inserter = new TrieInserter(root, key, value);
+        root = inserter.insert();
+    }
+
+    private boolean isValidValue(String value) {
+        return value.length() <= ATTRIBUTE_MAX_LENGTH && value.matches(ALLOWED_ATTRIBUTE_VALUES);
+    }
+
+    /**
+     * Searches for a key in the trie.
+     *
+     * @param key The key to search for.
+     * @return A list of string values associated with the key or its prefixes.
+     *         Returns an empty list if no matches are found.
+     */
+    public List<String> search(String key) {
+        TrieSearcher searcher = new TrieSearcher(root, key);
+        return searcher.search();
+    }
+
+    /**
+     * Deletes a key from the trie.
+     *
+     * @param key The key to be deleted.
+     * @return true if the key was successfully deleted, false otherwise.
+     */
+    public boolean delete(String key) {
+        TrieDeleter deleter = new TrieDeleter(root, key);
+        return deleter.delete();
+    }
+}
@@ -0,0 +1,66 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.wlm.rule.structure;
+
+/**
+ * Handles the deletion operation for the Trie.
+ */
+class TrieDeleter {
+    private TrieNode root;
+    private String key;
+
+    /**
+     * Constructs a TrieDeleter with the given root and key.
+     *
+     * @param root The root node of the trie.
+     * @param key  The key to be deleted.
+     */
+    public TrieDeleter(TrieNode root, String key) {
+        this.root = root;
+        this.key = key;
+    }
+
+    /**
+     * Performs the deletion operation.
+     *
+     * @return true if the key was successfully deleted, false otherwise.
+     */
+    public boolean delete() {
+        TrieNode current = root;
+        TrieNode parent = null;
+        String remainingKey = key;
+        while (!remainingKey.isEmpty()) {
+            TrieNode childNode = current.findCommonPrefixChild(remainingKey);
+
+            if (childNode == null) {
+                return false;
+            }
+            parent = current;
+            current = childNode;
+            remainingKey = remainingKey.substring(childNode.getKey().length());
+        }
+        final boolean deleted = current.isEndOfWord();
+
+        if (deleted) {
+            current.setEndOfWord(false);
+            current.setValue(null);
+            if (current.getChildren().isEmpty()) {
+                deleteLeafNode(parent, current);
+            }
+        }
+
+        return deleted;
+    }
+
+    private static void deleteLeafNode(TrieNode parent, TrieNode current) {
+        if (parent != null) {
+            parent.getChildren().remove(current.getKey());
+        }
+    }
+}
@@ -0,0 +1,91 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.wlm.rule.structure;
+
+/**
+ * Handles the insertion operation for the Trie.
+ */
+class TrieInserter {
+    private TrieNode root;
+    private String key;
+    private String value;
+
+    /**
+     * Constructs a TrieInserter with the given root, key, and value.
+     *
+     * @param root  The root node of the trie.
+     * @param key   The key to be inserted.
+     * @param value The value associated with the key.
+     */
+    public TrieInserter(TrieNode root, String key, String value) {
+        this.root = root;
+        this.key = key;
+        this.value = value;
+    }
+
+    /**
+     * Performs the insertion operation.
+     * Method should handle 3 cases
+     * <ol>
+     * <li>Simple addition of new child </li>
+     * <li>insert splits a node</li>
+     * <li>inserted key is a prefix to existing key|s, this could either mark a node as endOfWord or it could also split the node</li>
+     * </ol>
+     * @return The root node of the trie after insertion.
+     */
+    public TrieNode insert() {
+        TrieNode current = root;
+        String remainingKey = key;
+        while (!remainingKey.isEmpty()) {
+            TrieNode child = current.findCommonPrefixChild(remainingKey);
+
+            if (child == null) {
+                boolean partialMatch = false;
+                // partial match
+                for (String childKey : current.getChildren().keySet()) {
+                    int commonPrefixLength = getLongestCommonPrefixLength(childKey, remainingKey);
+                    if (commonPrefixLength > 0) {
+                        TrieNode newNode = current.splitNode(childKey, commonPrefixLength);
+
+                        remainingKey = remainingKey.substring(commonPrefixLength);
+
+                        current = newNode;
+                        partialMatch = true;
+                        break;
+                    }
+                }
+                // no match
+                if (!partialMatch) {
+                    current = current.addNewChild(remainingKey);
+                    remainingKey = "";
+                }
+            } else {
+                current = child;
+                remainingKey = remainingKey.substring(child.getKey().length());
+            }
+        }
+        updateNodeValue(current);
+        return root;
+    }
+
+    private void updateNodeValue(TrieNode node) {
+        node.setValue(value);
+        node.setEndOfWord(true);
+    }
+
+    private int getLongestCommonPrefixLength(String str1, String str2) {
+        int minLength = Math.min(str1.length(), str2.length());
+        for (int i = 0; i < minLength; i++) {
+            if (str1.charAt(i) != str2.charAt(i)) {
+                return i;
+            }
+        }
+        return minLength;
+    }
+}
@@ -0,0 +1,118 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.wlm.rule.structure;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+
+/**
+ * Represents a node in the Trie.
+ * Each node contains a key, an optional value, and references to child nodes.
+ */
+class TrieNode {
+    public static final int CLOSEST_LIMIT = 5;
+    private Map<String, TrieNode> children;
+    private String key;
+    private String value;
+    private boolean isEndOfWord;
+
+    /**
+     * Constructs a TrieNode with the given key.
+     *
+     * @param key The key associated with this node.
+     */
+    public TrieNode(String key) {
+        this.children = new HashMap<>();
+        this.key = key;
+        this.value = null;
+        this.isEndOfWord = false;
+    }
+
+    // Getters and setters
+    public Map<String, TrieNode> getChildren() {
+        return children;
+    }
+
+    public String getKey() {
+        return key;
+    }
+
+    public void setKey(String key) {
+        this.key = key;
+    }
+
+    public String getValue() {
+        return value;
+    }
+
+    public void setValue(String value) {
+        this.value = value;
+    }
+
+    public boolean isEndOfWord() {
+        return isEndOfWord;
+    }
+
+    public void setEndOfWord(boolean endOfWord) {
+        isEndOfWord = endOfWord;
+    }
+
+    public TrieNode addNewChild(String key) {
+        TrieNode newNode = new TrieNode(key);
+        newNode.setValue(value);
+        newNode.setEndOfWord(true);
+        getChildren().put(key, newNode);
+        return newNode;
+    }
+
+    public TrieNode splitNode(String childKey, int commonPrefixLength) {
+        String commonPrefix = childKey.substring(0, commonPrefixLength);
+        TrieNode newNode = new TrieNode(commonPrefix);
+        TrieNode childNode = getChildren().get(childKey);
+
+        // remove the existing partially matching child node since we will split that
+        getChildren().remove(childKey);
+        // re-attach common prefix as direct child
+        getChildren().put(commonPrefix, newNode);
+
+        childNode.setKey(childKey.substring(commonPrefixLength));
+
+        newNode.getChildren().put(childKey.substring(commonPrefixLength), childNode);
+        return newNode;
+    }
+
+    public TrieNode findCommonPrefixChild(String key) {
+        return getChildren().entrySet()
+            .stream()
+            .filter(entry -> key.startsWith(entry.getKey()))
+            .findFirst()
+            .map(Map.Entry::getValue)
+            .orElse(null);
+    }
+
+    public List<String> findTopFiveClosest() {
+        List<String> ans = new ArrayList<>(CLOSEST_LIMIT);
+        Queue<TrieNode> queue = new LinkedList<>();
+        queue.offer(this);
+
+        while (!queue.isEmpty() && ans.size() < CLOSEST_LIMIT) {
+            TrieNode current = queue.poll();
+            if (current.isEndOfWord()) {
+                ans.add(current.getValue());
+            }
+            queue.addAll(current.getChildren().values());
+        }
+
+        return ans;
+    }
+}