Skip to content

Commit

Permalink
mypy-induced cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
shanest committed Feb 23, 2024
1 parent 2955a1b commit ee9014f
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/ultk/language/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections.abc import Sequence
from itertools import product
from typing import Any, Callable, Generator, TypedDict
from dataclasses import dataclass, field
from dataclasses import dataclass
from yaml import load

try:
Expand Down
1 change: 0 additions & 1 deletion src/ultk/language/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

@dataclass(eq=True, unsafe_hash=True)
class Expression:

"""Minimally contains a form and a meaning."""

# gneric/dummy form and meaning if not specified
Expand Down
36 changes: 20 additions & 16 deletions src/ultk/language/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tqdm import tqdm


def powerset(iterable: Iterable, max_size: int = None) -> Iterable:
def powerset(iterable: Iterable, max_size: int | None = None) -> Iterable:
"""Enumerate all _non-empty_ subsets of an iterable up to a given maximum size, e.g.:
powerset([1,2,3]) --> (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)
Expand Down Expand Up @@ -45,7 +45,7 @@ def all_expressions(meanings: Iterable[Meaning]) -> Generator[Expression, None,
def all_languages(
expressions: Iterable[Expression],
language_class: Type[Language] = Language,
max_size: int = None,
max_size: int | None = None,
) -> Generator[Language, None, None]:
"""Generate all Languages (sets of Expressions) from a given set of Expressions.
Expand All @@ -71,9 +71,9 @@ def upto_comb(num: int, max_k: int) -> int:
def random_languages(
expressions: Iterable[Expression],
sampling_strategy: str = "uniform",
sample_size: int = None,
sample_size: int | None = None,
language_class: Type[Language] = Language,
max_size: int = None,
max_size: int | None = None,
) -> list[Language]:
"""Generate unique Languages by randomly sampling subsets of Expressions, either in a uniform or stratified way.
If there are fewer than `sample_size` possible Languages up to size `max_size`,
Expand Down Expand Up @@ -122,7 +122,7 @@ def random_languages(
return list(
all_languages(expressions, language_class=language_class, max_size=max_size)
)
languages = []
languages: list[Language] = []
subsets = set()
while len(languages) < sample_size:
if sampling_strategy == "stratified":
Expand All @@ -134,7 +134,9 @@ def random_languages(
)
if expr_indices not in subsets:
subsets.add(expr_indices)
languages.append(language_class([expressions[idx] for idx in expr_indices]))
languages.append(
language_class(tuple(expressions[idx] for idx in expr_indices))
)
return languages


Expand Down Expand Up @@ -212,7 +214,7 @@ def generate_languages(
word_amt_sample_size = int(sample_size / lang_size)

expressions_indices = list(range(total_word_amount))
languages = set()
languages: set[Language] = set()

# For each language size
for word_amount in word_amounts:
Expand Down Expand Up @@ -297,7 +299,7 @@ def sample_lang_size(
id_start: int = 0,
verbose=False,
dummy_name="sampled_lang_id",
) -> list[Language]:
) -> dict[str, Any]:
"""Get a sample of languages each of exactly lang_size.
Args:
Expand Down Expand Up @@ -361,7 +363,7 @@ def sample_quasi_natural(
"id_start": (updated length of languages)
}
"""
languages = set()
languages: set[Language] = set()

natural_indices = list(range(len(natural_terms)))
unnatural_indices = list(range(len(unnatural_terms)))
Expand Down Expand Up @@ -414,10 +416,12 @@ def sample_quasi_natural(
)

# Sample unique languages
seen = set()
seen: set[Language] = set()
for _ in range(degree_sample_size):
vocabulary = random_combination_vocabulary(
seen, num_natural, natural_terms, num_unnatural, unnatural_terms
vocabulary = tuple(
random_combination_vocabulary(
seen, num_natural, natural_terms, num_unnatural, unnatural_terms
)
)
id_start += 1
language = language_class(
Expand Down Expand Up @@ -488,9 +492,9 @@ def enumerate_all_languages(
# Construct the languages
for natural_subset in natural_subsets:
for unnatural_subset in unnatural_subsets:
vocabulary = [natural_terms[idx] for idx in natural_subset] + [
vocabulary = tuple(natural_terms[idx] for idx in natural_subset) + tuple(
unnatural_terms[idx] for idx in unnatural_subset
]
)
id_start += 1
language = language_class(vocabulary, name=rename_id(dummy_name, id_start))
languages.add(language)
Expand All @@ -503,7 +507,7 @@ def random_combination_vocabulary(
natural_terms: list[Expression],
num_unnatural: int = 0,
unnatural_terms: list[Expression] = [],
) -> list[Language]:
) -> list[Expression]:
"""Get a single vocabulary for a specific language size by choosing a random combination of natural and unnatural terms.
Args:
Expand All @@ -524,7 +528,7 @@ def random_combination_vocabulary(
nat_sample_indices = tuple(
sorted(random.sample(range(len(natural_terms)), num_natural))
)
unnat_sample_indices = ()
unnat_sample_indices: tuple[int, ...] = tuple()
if unnatural_terms:
unnat_sample_indices = tuple(
sorted(random.sample(range(len(unnatural_terms)), num_unnatural))
Expand Down
29 changes: 16 additions & 13 deletions src/ultk/language/semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,10 @@ def __hash__(self) -> int:

@dataclass(frozen=True)
class Universe:

"""The universe is the collection of possible referent objects for a meaning."""

referents: tuple[Referent]
prior: tuple[float] = None
referents: tuple[Referent, ...]
prior: tuple[float, ...] = tuple()

@cached_property
def _referents_by_name(self):
Expand Down Expand Up @@ -110,7 +109,7 @@ def from_dataframe(cls, df: pd.DataFrame):
Args:
a DataFrame representing the meaning space of interest, assumed to have a column `name`
"""
prior = None
prior: tuple[float, ...] = tuple()
if "probability" in df.columns:
prior = tuple(df["probability"])
records = df.to_dict("records")
Expand All @@ -128,9 +127,9 @@ def from_csv(cls, filename: str):

@dataclass(frozen=True)
class Meaning:
referents: tuple[Referent]
referents: tuple[Referent, ...]
universe: Universe
_dist: tuple[float] = None
_dist: tuple[float, ...] = tuple()
"""A meaning picks out a set of objects from the universe.
Following one tradition (from formal semantics), we might model an underspecified meaning as a subset of the universe.
Expand Down Expand Up @@ -164,17 +163,21 @@ def dist(self) -> tuple:
# normalize weights to distribution
total_weight = sum(self._dist)
return tuple(
self._dist[self.referents.index(self.universe.referents[idx])]
/ total_weight
if self.universe.referents[idx] in self.referents
else 0
(
self._dist[self.referents.index(self.universe.referents[idx])]
/ total_weight
if self.universe.referents[idx] in self.referents
else 0
)
for idx in range(len(self.universe.referents))
)
else:
return tuple(
1 / len(self.referents)
if self.universe.referents[idx] in self.referents
else 0
(
1 / len(self.referents)
if self.universe.referents[idx] in self.referents
else 0
)
for idx in range(len(self.universe.referents))
)

Expand Down

0 comments on commit ee9014f

Please sign in to comment.