Skip to content

Commit

Permalink
Separate and save lineage for all levels (#179)
Browse files Browse the repository at this point in the history
* Separte and save lineage for all levels

* Debug

* Debug
  • Loading branch information
dachengx authored Jul 31, 2024
1 parent 105859b commit 099e9c6
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 114 deletions.
62 changes: 36 additions & 26 deletions appletree/component.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from warnings import warn
from functools import partial
from typing import Tuple, List, Dict, Optional, Union, Set
Expand Down Expand Up @@ -183,9 +184,13 @@ def compile(self):
pass

@property
def lineage_hash(self):
def lineage(self):
raise NotImplementedError

@property
def lineage_hash(self):
return deterministic_hash(self.lineage)


@export
class ComponentSim(Component):
Expand Down Expand Up @@ -564,24 +569,26 @@ def new_component(self, llh_name: Optional[str] = None, pass_binning: bool = Tru
return component

@property
def lineage_hash(self):
return deterministic_hash(
{
**{
"rate_name": self.rate_name,
"norm_type": self.norm_type,
"bins": self.bins,
"bins_type": self.bins_type,
"code": self.code,
},
**dict(
def lineage(self):
return {
**{
"rate_name": self.rate_name,
"norm_type": self.norm_type,
"bins": (
tuple(b.tolist() for b in self.bins) if self.bins is not None else self.bins
),
"bins_type": self.bins_type,
"code": self.code,
},
**{
"instances": dict(
zip(
self.instances,
[_cached_functions[self.llh_name][p].lineage_hash for p in self.instances],
[_cached_functions[self.llh_name][p].lineage for p in self.instances],
)
),
}
)
)
},
}


@export
Expand Down Expand Up @@ -627,16 +634,19 @@ def simulate_weighted_data(self, parameters, *args, **kwargs):
return result

@property
def lineage_hash(self):
return deterministic_hash(
{
"rate_name": self.rate_name,
"norm_type": self.norm_type,
"bins": self.bins,
"bins_type": self.bins_type,
"file_name": calculate_sha256(get_file_path(self._file_name)),
}
)
def lineage(self):
return {
"rate_name": self.rate_name,
"norm_type": self.norm_type,
"bins": tuple(b.tolist() for b in self.bins) if self.bins is not None else self.bins,
"bins_type": self.bins_type,
"file_path": (
os.path.basename(self._file_name)
if not utils.FULL_PATH_LINEAGE
else get_file_path(self._file_name)
),
"sha256": calculate_sha256(get_file_path(self._file_name)),
}


@export
Expand Down
69 changes: 35 additions & 34 deletions appletree/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
from strax import deterministic_hash

from appletree import utils
from appletree.share import _cached_configs
from appletree.utils import (
exporter,
Expand Down Expand Up @@ -112,9 +113,13 @@ def required_parameter(self, llh_name=None):
return None

@property
def lineage_hash(self):
def lineage(self):
raise NotImplementedError

@property
def lineage_hash(self):
return deterministic_hash(self.lineage)


@export
class Constant(Config):
Expand Down Expand Up @@ -145,13 +150,11 @@ def build(self, llh_name: Optional[str] = None):
self.value = value

@property
def lineage_hash(self):
return deterministic_hash(
{
"llh_name": self.llh_name,
"value": self.value,
}
)
def lineage(self):
return {
"llh_name": self.llh_name,
"value": self.value,
}


@export
Expand Down Expand Up @@ -338,15 +341,17 @@ def pdf_to_cdf(self, x, pdf):
return x, cdf

@property
def lineage_hash(self):
return deterministic_hash(
{
"llh_name": self.llh_name,
"method": self.method,
"file_path": os.path.basename(self.file_path),
"sha256": calculate_sha256(get_file_path(self.file_path)),
}
)
def lineage(self):
return {
"llh_name": self.llh_name,
"method": self.method,
"file_path": (
os.path.basename(self.file_path)
if not utils.FULL_PATH_LINEAGE
else get_file_path(self.file_path)
),
"sha256": calculate_sha256(get_file_path(self.file_path)),
}


@export
Expand Down Expand Up @@ -500,16 +505,14 @@ def apply(self, pos, parameters):
return median + add

@property
def lineage_hash(self):
return deterministic_hash(
{
"llh_name": self.llh_name,
"method": self.method,
"median": self.median.lineage_hash,
"lower": self.lower.lineage_hash,
"upper": self.upper.lineage_hash,
}
)
def lineage(self):
return {
"llh_name": self.llh_name,
"method": self.method,
"median": self.median.lineage,
"lower": self.lower.lineage,
"upper": self.upper.lineage,
}


@export
Expand Down Expand Up @@ -559,10 +562,8 @@ def _sanity_check(self):
assert np.all(np.isclose(volumes, volumes[0])), mesg

@property
def lineage_hash(self):
return deterministic_hash(
{
"llh_name": self.llh_name,
"value": self.value,
}
)
def lineage(self):
return {
"llh_name": self.llh_name,
"value": self.value,
}
42 changes: 25 additions & 17 deletions appletree/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import appletree as apt
from appletree import randgen
from appletree import Parameter
from appletree.utils import load_json, get_file_path
from appletree.utils import JSON_OPTIONS, load_json, get_file_path
from appletree.share import _cached_configs, set_global_config

os.environ["OMP_NUM_THREADS"] = "1"
Expand Down Expand Up @@ -303,19 +303,23 @@ def _dump_meta(self, batch_size, metadata=None):
if self.backend_h5 is not None:
name = self.sampler.backend.name
with h5py.File(self.backend_h5, "r+") as opt:
opt[name].attrs["metadata"] = json.dumps(metadata)
opt[name].attrs["metadata"] = json.dumps(metadata, **JSON_OPTIONS)
# parameters prior configuration
opt[name].attrs["par_config"] = json.dumps(self.par_manager.par_config)
opt[name].attrs["par_config"] = json.dumps(
self.par_manager.par_config, **JSON_OPTIONS
)
# max posterior parameters
opt[name].attrs["post_parameters"] = json.dumps(self.get_post_parameters())
opt[name].attrs["post_parameters"] = json.dumps(
self.get_post_parameters(), **JSON_OPTIONS
)
# the order of parameters saved in backend
opt[name].attrs["parameter_fit"] = self.par_manager.parameter_fit
# instructions
opt[name].attrs["instruct"] = json.dumps(self.instruct)
opt[name].attrs["instruct"] = json.dumps(self.instruct, **JSON_OPTIONS)
# configs
opt[name].attrs["config"] = json.dumps(self.config)
opt[name].attrs["config"] = json.dumps(self.config, **JSON_OPTIONS)
# configurations, maybe users will manually add some maps
opt[name].attrs["_cached_configs"] = json.dumps(_cached_configs)
opt[name].attrs["_cached_configs"] = json.dumps(_cached_configs, **JSON_OPTIONS)
# batch size
opt[name].attrs["batch_size"] = batch_size

Expand Down Expand Up @@ -392,16 +396,20 @@ def update_parameter_config(self, likelihoods):
return needed_parameters

@property
def lineage_hash(self):
return deterministic_hash(
{
**self.instruct,
**self.par_config,
**dict(
def lineage(self):
return {
**self.instruct,
**{"par_config": self.par_config},
**{
"likelihoods": dict(
zip(
self.likelihoods.keys(),
[v.lineage_hash for v in self.likelihoods.values()],
[v.lineage for v in self.likelihoods.values()],
)
),
}
)
)
},
}

@property
def lineage_hash(self):
return deterministic_hash(self.lineage)
58 changes: 35 additions & 23 deletions appletree/likelihood.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from warnings import warn
from typing import Type, Dict, Set, Optional, cast
import inspect
Expand All @@ -7,6 +8,7 @@
from scipy.stats import norm
from strax import deterministic_hash

from appletree import utils
from appletree import randgen
from appletree.hist import make_hist_mesh_grid, make_hist_irreg_bin_1d, make_hist_irreg_bin_2d
from appletree.utils import (
Expand Down Expand Up @@ -151,7 +153,7 @@ def set_binning(self, config):
clip=config["clip"],
which_np=np,
)
self._bins = [self._bins]
self._bins = (self._bins,)
self.data_hist = make_hist_irreg_bin_1d(
self.data[:, 0],
bins=self._bins[0],
Expand Down Expand Up @@ -228,6 +230,7 @@ def set_binning(self, config):
)
else:
raise ValueError("'bins_type' should either be meshgrid, equiprob or irreg")
assert isinstance(self._bins, tuple), "bins should be tuple after setting binning!"

def register_component(
self, component_cls: Type[Component], component_name: str, file_name: Optional[str] = None
Expand Down Expand Up @@ -410,21 +413,30 @@ def print_likelihood_summary(self, indent: str = " " * 4, short: bool = True):
print("-" * 40)

@property
def lineage_hash(self):
return deterministic_hash(
{
**{
"config": self._config,
"sha256": calculate_sha256(get_file_path(self._data_file_name)),
},
**dict(
def lineage(self):
return {
**{
"config": self._config,
"file_path": (
os.path.basename(self._data_file_name)
if not utils.FULL_PATH_LINEAGE
else get_file_path(self._data_file_name)
),
"sha256": calculate_sha256(get_file_path(self._data_file_name)),
},
**{
"components": dict(
zip(
self.components.keys(),
[v.lineage_hash for v in self.components.values()],
[v.lineage for v in self.components.values()],
)
),
}
)
)
},
}

@property
def lineage_hash(self):
return deterministic_hash(self.lineage)


class LikelihoodLit(Likelihood):
Expand Down Expand Up @@ -577,17 +589,17 @@ def print_likelihood_summary(self, indent: str = " " * 4, short: bool = True):
print("-" * 40)

@property
def lineage_hash(self):
return deterministic_hash(
{
**{
"config": self._config,
},
**dict(
def lineage(self):
return {
**{
"config": self._config,
},
**{
"components": dict(
zip(
self.components.keys(),
[v.lineage_hash for v in self.components.values()],
)
),
}
)
)
},
}
Loading

0 comments on commit 099e9c6

Please sign in to comment.