Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metropolis Hastings with All or Nothing for Likelihood #54

Open
wants to merge 5 commits into
base: metro_hastings
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/examples/indefinites/grammar.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from ultk.language.grammar import Grammar, Rule
import pandas as pd
from ultk.language.semantics import Universe

# indefinites_grammar = Grammar.from_yaml("indefinites/grammar.yml")
indefinites_grammar = Grammar.from_module("indefinites.grammar_functions")
referents = pd.read_csv("indefinites/referents.csv")
prior = pd.read_csv("indefinites/data/Beekhuizen_priors.csv")
assert (referents["name"] == prior["name"]).all()
referents["probability"] = prior["probability"]
universe = Universe.from_dataframe(referents)

indefinites_grammar = Grammar.from_yaml("indefinites/grammar.yml")
#indefinites_grammar = Grammar.from_module("indefinites.grammar_functions")
print(indefinites_grammar.parse("and(not(K+), or(N-, not(SE-)))").hm_sample(indefinites_grammar, [(universe.referents[2], True)]))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's important for the indefinites example to not modify this file directly. The value indefinites_grammar is used in other scripts in the example.

Can you make an example demo-ing the MH sampling elsewhere (either its own file here, or a new sub-folder)?


"""
# this defines the grammar "manually" instead of using the YAML text format
Expand Down
88 changes: 88 additions & 0 deletions src/ultk/language/grammar.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import inspect
import random
import re
import copy
from collections import defaultdict
from collections.abc import Sequence
from dataclasses import dataclass
Expand All @@ -20,6 +21,15 @@

T = TypeVar("T")

def all_or_nothing(data, tree):
jingnongqu marked this conversation as resolved.
Show resolved Hide resolved
prob = 1
for i in data:
jingnongqu marked this conversation as resolved.
Show resolved Hide resolved
if tree(i[0])==i[1]:
prob = prob * 1
else:
return 0
return prob
jingnongqu marked this conversation as resolved.
Show resolved Hide resolved


@dataclass(frozen=True)
class Rule:
Expand Down Expand Up @@ -162,6 +172,9 @@ def add_child(self, child) -> None:
self.children = tuple([child])
else:
self.children = self.children + (child,)

def replace_children(self, children) -> None:
self.children = children

def to_dict(self) -> dict:
the_dict = super().to_dict()
Expand All @@ -178,6 +191,78 @@ def count_atoms(self):
return 1
return sum(child.count_atoms() for child in self.children)




# data: (input, output)
def hm_sample(self, grammar: "Grammar", data, likelihood_func=all_or_nothing) -> "GrammaticalExpression":
jingnongqu marked this conversation as resolved.
Show resolved Hide resolved
jingnongqu marked this conversation as resolved.
Show resolved Hide resolved
old_tree_prior = self.prior(grammar)
old_node_count = self.node_count()
while True:
old_tree = copy.deepcopy(self)
linearized_self = []
parents = []
stack = [(old_tree, -1)]
while stack:
current_node, parent_index = stack.pop()
linearized_self.append(current_node)
parents.append(parent_index)
current_index = len(linearized_self) - 1
children = current_node.children if current_node.children else ()
for child in children:
stack.append((child, current_index))
changing_node = random.choice(range(len(linearized_self)))
#print(str(linearized_self[changing_node]))
#print(str(linearized_self[parents[changing_node]]))
current_node = linearized_self[changing_node]
parent_node = linearized_self[parents[changing_node]]
old_subtree_prior = current_node.prior(grammar)
new_tree, new_node = None, None
if parents[changing_node] != -1:
new_children = []
children = parent_node.children if parent_node.children else ()
for child in children:
if child is current_node:
new_node = grammar.generate(grammar._rules_by_name[current_node.rule_name].lhs)
new_children.append(new_node)
else:
new_children.append(child)
parent_node.replace_children(tuple(new_children))
new_tree = old_tree
else:
new_node = grammar.generate(grammar._rules_by_name[old_tree.rule_name].lhs)
new_tree = new_node
new_tree_prior = new_tree.prior(grammar)
new_node_count = new_tree.node_count()
new_subtree_prior = new_node.prior(grammar)
# Seems sketchy with the division by zero going on
try:
mh_accept = min(1, ((new_tree_prior*likelihood_func(data, new_tree))/(old_tree_prior*likelihood_func(data, old_tree)))*((old_subtree_prior/new_node_count)/(new_subtree_prior/old_node_count)))
except ZeroDivisionError:
mh_accept = 0
print(mh_accept)
if random.random() < mh_accept:
return(new_tree)

def prior(self, grammar: "Grammar") -> float:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually, I think I'd vote for prior belonging to Grammar and taking an expression as input (i.e. the grammar is the thing that assigns probabilities to expressions), but am open to being convinced otherwise

probability = grammar.probability(grammar._rules_by_name[self.rule_name])
children = self.children if self.children else ()
for child in children:
probability = probability * (child.prior(grammar))
return probability

def node_count(self) -> int:
counter = 1
stack = [self]
while stack:
current_node = stack.pop()
children = current_node.children if current_node.children else ()
for child in children:
stack.append(child)
counter += 1
return counter


@classmethod
def from_dict(cls, the_dict: dict, grammar: "Grammar") -> "GrammaticalExpression":
children = the_dict.get("children")
Expand Down Expand Up @@ -258,6 +343,9 @@ def add_rule(self, rule: Rule):
)
self._rules_by_name[rule.name] = rule

def probability(self, rule: Rule) -> float:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth exploring whether we can use cache or cached_property for this, so that it only gets computed once; see https://docs.python.org/3/library/functools.html

return float(rule.weight)/sum([r.weight for r in self._rules[rule.lhs]])

def parse(
self,
expression: str,
Expand Down
1 change: 1 addition & 0 deletions src/ultk/language/semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def __bool__(self):
return bool(self.mapping) # and bool(self.universe)

def __str__(self):
return f"Mapping:\n\t{chr(10).join(f'{ref}: {self.mapping[ref]}' for ref in self.mapping)}"
return "Mapping:\n\t{0}".format(
"\n".join(f"{ref}: {self.mapping[ref]}" for ref in self.mapping)
) # \ \nDistribution:\n\t{self.dist}\n"
Loading