Skip to content
This repository has been archived by the owner on Jan 14, 2025. It is now read-only.

Commit

Permalink
Merge pull request #189 from lincc-frameworks/Issue/180-Errors_sf
Browse files Browse the repository at this point in the history
Dedicated error estimate in structure function
  • Loading branch information
nevencaplar authored Aug 7, 2023
2 parents 95df58c + 24dd70d commit d166744
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 38 deletions.
13 changes: 11 additions & 2 deletions src/tape/analysis/structure_function/base_argument_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ class StructureFunctionArgumentContainer:
If a value is not provided here, then the default number of
lightcurve samples will be equal to the least number of differences
in the available lightcurves. By default None.
estimate_err: `bool`, optional
Specifies if the structure function errors are to be estimated,
via bootstraping the sample and taking note of 16 and 84 percentile
of the resulting distribution
calculation_repetitions: `int`, optional
Specifies the number of times to repeat the structure function
calculation. Typically this would be used when setting
`equally_weight_lightcurves = True`. By default 1.
`estimate_err = True`. By default 1 when not estimating errors,
and 100 when estimating errors.
lower_error_quantile: `float`, optional
When calculation_repetitions > 1 we will calculate the
`lower_error_quantile` and `upper_error_quantile` quantiles of the
Expand Down Expand Up @@ -101,7 +106,11 @@ class StructureFunctionArgumentContainer:
random_seed: int = None
equally_weight_lightcurves: bool = False
number_lightcurve_samples: int = None
calculation_repetitions: int = 1
estimate_err: bool = False
if estimate_err:
calculation_repetitions: int = 100
else:
calculation_repetitions: int = 1
lower_error_quantile: float = 0.16
upper_error_quantile: float = 0.84
report_upper_lower_error_separately: bool = False
Expand Down
21 changes: 11 additions & 10 deletions src/tape/analysis/structure_function/base_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
self._bins = argument_container.bins # defaults to None
self._binning_method = argument_container.bin_method
self._bin_count_target = argument_container.bin_count_target
self._equally_weight_lightcurves = argument_container.equally_weight_lightcurves
self._dts = []
self._all_d_fluxes = []
self._sum_error_squared = []
Expand All @@ -36,17 +37,16 @@ def calculate(self):
"""Abstract method that must be implemented by the child class."""
raise (NotImplementedError, "Must be implemented by the child class")

def _equally_weight_lightcurves(self, random_generator=None):
"""This method reduces the number of difference samples for all light
curves to prevent a few from dominating the calculation.
"""
def _bootstrap(self, random_generator=None):
"""This method creates the boostraped samples of difference values"""
self._get_difference_values_per_lightcurve()

# if the user defined number_lightcurve_samples in the argument container,
# use that, otherwise, default to the minimum number of difference values.
least_lightcurve_differences = self._argument_container.number_lightcurve_samples or min(
self._difference_values_per_lightcurve
)
# if the user defined equal weight in the argument container,
# use that, otherwise, go to specified number of difference values.
if self._equally_weight_lightcurves is True:
least_lightcurve_differences = min(self._difference_values_per_lightcurve)
else:
least_lightcurve_differences = self._argument_container.number_lightcurve_samples

for lc in self._lightcurves:
lc.select_difference_samples(least_lightcurve_differences, random_generator=random_generator)
Expand Down Expand Up @@ -194,7 +194,8 @@ def _calculate_binned_statistics(self, sample_values=None, statistic_to_apply="m
)
except AttributeError:
raise AttributeError(
"Length of self._lightcurves[lc_idx].sample_d_times array must equal length of corresponding sample_value array."
"Length of self._lightcurves[lc_idx].sample_d_times array \
must equal length of corresponding sample_value array."
)

sfs_all.append(sfs)
Expand Down
10 changes: 7 additions & 3 deletions src/tape/analysis/structure_function/sf_light_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _calculate_differences(self):
self.sample_sum_squared_error = self._all_sum_squared_error

def select_difference_samples(
self, number_of_samples: int, random_generator: Optional[np.random.Generator] = None
self, number_of_samples: Optional[int] = None, random_generator: Optional[np.random.Generator] = None
):
"""Take a random sample of time and flux differences and the sum of squared
errors. The samples are selected without replacement. The resulting
Expand All @@ -58,9 +58,10 @@ def select_difference_samples(
Parameters
----------
number_of_samples : int
number_of_samples : int, optional
Defines the number of samples to be randomly selected from the total
number of difference values.
number of difference values. If not specified, take all of the
avaliable values
random_generator: np.random.Generator, optional
A Numpy random.Generator to sample the lightcurve difference. This
allows for repeatable random samples to be selected. By default None.
Expand All @@ -73,6 +74,9 @@ def select_difference_samples(
"""

if number_of_samples is None:
number_of_samples = self.number_of_difference_values

if number_of_samples > self.number_of_difference_values:
raise ValueError(
f"Requesting {number_of_samples} samples, but only "
Expand Down
4 changes: 2 additions & 2 deletions src/tape/analysis/structurefunction2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def calc_sf2(time, flux, err=None, band=None, lc_id=None, sf_method="basic", arg
aggregated_sfs: List[np.ndarray] = []
rng = np.random.default_rng(argument_container.random_seed)
for _ in range(argument_container.calculation_repetitions):
if argument_container.equally_weight_lightcurves:
sf_calculator._equally_weight_lightcurves(random_generator=rng)
if argument_container.estimate_err:
sf_calculator._bootstrap(random_generator=rng)

tmp_dts, tmp_sfs = sf_calculator.calculate()
aggregated_dts.append(tmp_dts)
Expand Down
44 changes: 23 additions & 21 deletions tests/tape_tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,16 +646,15 @@ def test_sf2_provide_bins_in_argument_container():

def test_sf2_with_random_sampling_one_lightcurve():
"""
Base case of using equal weighting passing only 1 light curve
Base case of passing only one light curve
"""
lc_id = [1, 1, 1, 1, 1, 1, 1, 1]
test_t = [1.11, 2.23, 3.45, 4.01, 5.67, 6.32, 7.88, 8.2]
test_y = [0.11, 0.23, 0.45, 0.01, 0.67, 0.32, 0.88, 0.2]
test_yerr = [0.1, 0.023, 0.045, 0.1, 0.067, 0.032, 0.8, 0.02]
test_band = np.array(["r"] * len(test_y))
test_arg_container = StructureFunctionArgumentContainer()
# TODO: Issue-180 will rename this
test_arg_container.equally_weight_lightcurves = True
test_arg_container.estimate_err = True
test_arg_container.random_seed = 42

res = analysis.calc_sf2(
Expand Down Expand Up @@ -752,10 +751,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve():
argument_container=test_arg_container,
)

assert res["dt"][0] == pytest.approx(3.4475, rel=0.001)
assert res["sf2"][0] == pytest.approx(-0.0030716, rel=0.001)
assert res["dt"][1] == pytest.approx(2.75786, rel=0.001)
assert res["sf2"][1] == pytest.approx(-0.01543, rel=0.001)
assert res["dt"][0] == pytest.approx(3.148214, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.005365, rel=0.001)
assert res["dt"][1] == pytest.approx(2.891860, rel=0.001)
assert res["sf2"][1] == pytest.approx(0.048904, rel=0.001)


def test_sf2_with_unequal_weighting_multiple_lightcurve():
Expand Down Expand Up @@ -846,7 +845,7 @@ def test_sf2_with_unequal_weighting_multiple_lightcurve():
assert res["lc_id"][0] == "1"
assert res["band"][0] == "g"
assert res["dt"][0] == pytest.approx(1.2533, rel=0.001)
assert res["sf2"][0] == pytest.approx(-0.1641, rel=0.001)
assert res["sf2"][0] == pytest.approx(-0.164149, rel=0.001)
assert res["lc_id"][5] == "2"
assert res["band"][5] == "r"
assert res["dt"][5] == pytest.approx(0.8875, rel=0.001)
Expand Down Expand Up @@ -935,10 +934,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings():
argument_container=test_arg_container,
)

assert res["dt"][0] == pytest.approx(3.1473, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.005837, rel=0.001)
assert res["dt"][1] == pytest.approx(2.87768, rel=0.001)
assert res["sf2"][1] == pytest.approx(0.04912, rel=0.001)
assert res["dt"][0] == pytest.approx(3.148214, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.005365, rel=0.001)
assert res["dt"][1] == pytest.approx(2.891860, rel=0.001)
assert res["sf2"][1] == pytest.approx(0.048904, rel=0.001)


def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_bins():
Expand Down Expand Up @@ -1024,10 +1023,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_b
argument_container=test_arg_container,
)

assert res["dt"][0] == pytest.approx(0.64267, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.04018, rel=0.001)
assert res["dt"][7] == pytest.approx(0.6095, rel=0.001)
assert res["sf2"][7] == pytest.approx(0.08424, rel=0.001)
assert res["dt"][0] == pytest.approx(0.662500, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.031108, rel=0.001)
assert res["dt"][7] == pytest.approx(0.643333, rel=0.001)
assert res["sf2"][7] == pytest.approx(0.070499, rel=0.001)


def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_combining():
Expand Down Expand Up @@ -1101,6 +1100,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
test_band = np.array(["r"] * len(test_y))
test_arg_container = StructureFunctionArgumentContainer()
test_arg_container.equally_weight_lightcurves = True
test_arg_container.estimate_err = True
test_arg_container.random_seed = 42
test_arg_container.calculation_repetitions = 3
test_arg_container.bin_count_target = 4
Expand All @@ -1116,11 +1116,11 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
)

assert res["dt"][0] == pytest.approx(0.55, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.1001165, rel=0.001)
assert res["1_sigma"][0] == pytest.approx(0.0611, rel=0.001)
assert res["dt"][9] == pytest.approx(3.035, rel=0.001)
assert res["sf2"][9] == pytest.approx(0.0396455, rel=0.001)
assert res["1_sigma"][9] == pytest.approx(0.04501, rel=0.001)
assert res["sf2"][0] == pytest.approx(0.100116, rel=0.001)
assert res["1_sigma"][0] == pytest.approx(0.061128, rel=0.001)
assert res["dt"][9] == pytest.approx(3.035000, rel=0.001)
assert res["sf2"][9] == pytest.approx(0.039646, rel=0.001)
assert res["1_sigma"][9] == pytest.approx(0.045012, rel=0.001)


def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_combining_non_default_sigma():
Expand Down Expand Up @@ -1195,6 +1195,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
test_band = np.array(["r"] * len(test_y))
test_arg_container = StructureFunctionArgumentContainer()
test_arg_container.equally_weight_lightcurves = True
test_arg_container.estimate_err = True
test_arg_container.random_seed = 42
test_arg_container.calculation_repetitions = 3
test_arg_container.bin_count_target = 4
Expand Down Expand Up @@ -1289,6 +1290,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_b
test_band = np.array(["r"] * len(test_y))
test_arg_container = StructureFunctionArgumentContainer()
test_arg_container.equally_weight_lightcurves = True
test_arg_container.estimate_err = True
test_arg_container.random_seed = 42
test_arg_container.calculation_repetitions = 100
test_arg_container.bin_count_target = 4
Expand Down

0 comments on commit d166744

Please sign in to comment.