CLMBRs · jingnongqu · Dec 3, 2024 · Dec 3, 2024 · Jan 15, 2025 · Jan 15, 2025
diff --git a/src/examples/indefinites/grammar.py b/src/examples/indefinites/grammar.py
@@ -1,8 +1,16 @@
 from ultk.language.grammar import Grammar, Rule
+import pandas as pd
+from ultk.language.semantics import Universe
 
-# indefinites_grammar = Grammar.from_yaml("indefinites/grammar.yml")
-indefinites_grammar = Grammar.from_module("indefinites.grammar_functions")
+referents = pd.read_csv("indefinites/referents.csv")
+prior = pd.read_csv("indefinites/data/Beekhuizen_priors.csv")
+assert (referents["name"] == prior["name"]).all()
+referents["probability"] = prior["probability"]
+universe = Universe.from_dataframe(referents)
 
+indefinites_grammar = Grammar.from_yaml("indefinites/grammar.yml")
+#indefinites_grammar = Grammar.from_module("indefinites.grammar_functions")
+print(indefinites_grammar.parse("and(not(K+), or(N-, not(SE-)))").hm_sample(indefinites_grammar, [(universe.referents[2], True)]))
 
 """
 # this defines the grammar "manually" instead of using the YAML text format

diff --git a/src/ultk/language/grammar.py b/src/ultk/language/grammar.py
@@ -1,6 +1,7 @@
 import inspect
 import random
 import re
+import copy
 from collections import defaultdict
 from collections.abc import Sequence
 from dataclasses import dataclass
@@ -20,6 +21,15 @@
 
 T = TypeVar("T")
 
+def all_or_nothing(data, tree):
+    prob = 1
+    for i in data:
+        if tree(i[0])==i[1]:
+            prob = prob * 1
+        else:
+            return 0
+    return prob
+
 
 @dataclass(frozen=True)
 class Rule:
@@ -162,6 +172,9 @@ def add_child(self, child) -> None:
             self.children = tuple([child])
         else:
             self.children = self.children + (child,)
+
+    def replace_children(self, children) -> None:
+        self.children = children
 
     def to_dict(self) -> dict:
         the_dict = super().to_dict()
@@ -178,6 +191,78 @@ def count_atoms(self):
             return 1
         return sum(child.count_atoms() for child in self.children)
 
+
+
+
+    # data: (input, output)
+    def hm_sample(self, grammar: "Grammar", data, likelihood_func=all_or_nothing) -> "GrammaticalExpression":
+        old_tree_prior = self.prior(grammar)
+        old_node_count = self.node_count()
+        while True:
+            old_tree = copy.deepcopy(self)
+            linearized_self = []
+            parents = []
+            stack = [(old_tree, -1)]
+            while stack:
+                current_node, parent_index = stack.pop()
+                linearized_self.append(current_node)
+                parents.append(parent_index)
+                current_index = len(linearized_self) - 1
+                children = current_node.children if current_node.children else ()
+                for child in children:
+                    stack.append((child, current_index))
+            changing_node = random.choice(range(len(linearized_self)))
+            #print(str(linearized_self[changing_node]))
+            #print(str(linearized_self[parents[changing_node]]))
+            current_node = linearized_self[changing_node]
+            parent_node = linearized_self[parents[changing_node]]
+            old_subtree_prior = current_node.prior(grammar)
+            new_tree, new_node = None, None
+            if parents[changing_node] != -1:
+                new_children = []
+                children = parent_node.children if parent_node.children else ()
+                for child in children:
+                    if child is current_node:
+                        new_node = grammar.generate(grammar._rules_by_name[current_node.rule_name].lhs)
+                        new_children.append(new_node)
+                    else:
+                        new_children.append(child)
+                parent_node.replace_children(tuple(new_children))
+                new_tree = old_tree
+            else:
+                new_node = grammar.generate(grammar._rules_by_name[old_tree.rule_name].lhs)
+                new_tree = new_node
+            new_tree_prior = new_tree.prior(grammar)
+            new_node_count = new_tree.node_count()
+            new_subtree_prior = new_node.prior(grammar)
+            # Seems sketchy with the division by zero going on
+            try:
+                mh_accept = min(1, ((new_tree_prior*likelihood_func(data, new_tree))/(old_tree_prior*likelihood_func(data, old_tree)))*((old_subtree_prior/new_node_count)/(new_subtree_prior/old_node_count)))
+            except ZeroDivisionError:
+                mh_accept = 0
+            print(mh_accept)
+            if random.random() < mh_accept:
+                return(new_tree)
+
+    def prior(self, grammar: "Grammar") -> float:
+        probability = grammar.probability(grammar._rules_by_name[self.rule_name])
+        children = self.children if self.children else ()
+        for child in children:
+            probability = probability * (child.prior(grammar))
+        return probability
+
+    def node_count(self) -> int:
+        counter = 1
+        stack = [self]
+        while stack:
+            current_node = stack.pop()
+            children = current_node.children if current_node.children else ()
+            for child in children:
+                stack.append(child)
+                counter += 1
+        return counter
+
+
     @classmethod
     def from_dict(cls, the_dict: dict, grammar: "Grammar") -> "GrammaticalExpression":
         children = the_dict.get("children")
@@ -258,6 +343,9 @@ def add_rule(self, rule: Rule):
             )
         self._rules_by_name[rule.name] = rule
 
+    def probability(self, rule: Rule) -> float:
+        return float(rule.weight)/sum([r.weight for r in self._rules[rule.lhs]])
+
     def parse(
         self,
         expression: str,

diff --git a/src/ultk/language/semantics.py b/src/ultk/language/semantics.py
@@ -196,6 +196,7 @@ def __bool__(self):
         return bool(self.mapping)  # and bool(self.universe)
 
     def __str__(self):
+        return f"Mapping:\n\t{chr(10).join(f'{ref}: {self.mapping[ref]}' for ref in self.mapping)}"
         return "Mapping:\n\t{0}".format(
             "\n".join(f"{ref}: {self.mapping[ref]}" for ref in self.mapping)
         )  # \ \nDistribution:\n\t{self.dist}\n"