Merge pull request #189 from lincc-frameworks/Issue/180-Errors_sf

Dedicated error estimate in structure function
lincc-frameworks · Aug 7, 2023 · d166744 · d166744
2 parents 95df58c + 24dd70d
commit d166744
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 38 deletions.
diff --git a/src/tape/analysis/structure_function/base_argument_container.py b/src/tape/analysis/structure_function/base_argument_container.py
@@ -61,10 +61,15 @@ class StructureFunctionArgumentContainer:
             If a value is not provided here, then the default number of
             lightcurve samples will be equal to the least number of differences
             in the available lightcurves. By default None.
+        estimate_err: `bool`, optional
+            Specifies if the structure function errors are to be estimated,
+            via bootstraping the sample and taking note of 16 and 84 percentile
+            of the resulting distribution
         calculation_repetitions: `int`, optional
             Specifies the number of times to repeat the structure function
             calculation. Typically this would be used when setting
-            `equally_weight_lightcurves = True`. By default 1.
+            `estimate_err = True`. By default 1 when not estimating errors,
+            and 100 when estimating errors.
         lower_error_quantile: `float`, optional
             When calculation_repetitions > 1 we will calculate the
             `lower_error_quantile` and `upper_error_quantile` quantiles of the
@@ -101,7 +106,11 @@ class StructureFunctionArgumentContainer:
     random_seed: int = None
     equally_weight_lightcurves: bool = False
     number_lightcurve_samples: int = None
-    calculation_repetitions: int = 1
+    estimate_err: bool = False
+    if estimate_err:
+        calculation_repetitions: int = 100
+    else:
+        calculation_repetitions: int = 1
     lower_error_quantile: float = 0.16
     upper_error_quantile: float = 0.84
     report_upper_lower_error_separately: bool = False

diff --git a/src/tape/analysis/structure_function/base_calculator.py b/src/tape/analysis/structure_function/base_calculator.py
@@ -25,6 +25,7 @@ def __init__(
         self._bins = argument_container.bins  # defaults to None
         self._binning_method = argument_container.bin_method
         self._bin_count_target = argument_container.bin_count_target
+        self._equally_weight_lightcurves = argument_container.equally_weight_lightcurves
         self._dts = []
         self._all_d_fluxes = []
         self._sum_error_squared = []
@@ -36,17 +37,16 @@ def calculate(self):
         """Abstract method that must be implemented by the child class."""
         raise (NotImplementedError, "Must be implemented by the child class")
 
-    def _equally_weight_lightcurves(self, random_generator=None):
-        """This method reduces the number of difference samples for all light
-        curves to prevent a few from dominating the calculation.
-        """
+    def _bootstrap(self, random_generator=None):
+        """This method creates the boostraped samples of difference values"""
         self._get_difference_values_per_lightcurve()
 
-        # if the user defined number_lightcurve_samples in the argument container,
-        # use that, otherwise, default to the minimum number of difference values.
-        least_lightcurve_differences = self._argument_container.number_lightcurve_samples or min(
-            self._difference_values_per_lightcurve
-        )
+        # if the user defined equal weight in the argument container,
+        # use that, otherwise, go to specified number of difference values.
+        if self._equally_weight_lightcurves is True:
+            least_lightcurve_differences = min(self._difference_values_per_lightcurve)
+        else:
+            least_lightcurve_differences = self._argument_container.number_lightcurve_samples
 
         for lc in self._lightcurves:
             lc.select_difference_samples(least_lightcurve_differences, random_generator=random_generator)
@@ -194,7 +194,8 @@ def _calculate_binned_statistics(self, sample_values=None, statistic_to_apply="m
                         )
                     except AttributeError:
                         raise AttributeError(
-                            "Length of self._lightcurves[lc_idx].sample_d_times array must equal length of corresponding sample_value array."
+                            "Length of self._lightcurves[lc_idx].sample_d_times array \
+                                must equal length of corresponding sample_value array."
                         )
 
                     sfs_all.append(sfs)

diff --git a/src/tape/analysis/structure_function/sf_light_curve.py b/src/tape/analysis/structure_function/sf_light_curve.py
@@ -49,7 +49,7 @@ def _calculate_differences(self):
         self.sample_sum_squared_error = self._all_sum_squared_error
 
     def select_difference_samples(
-        self, number_of_samples: int, random_generator: Optional[np.random.Generator] = None
+        self, number_of_samples: Optional[int] = None, random_generator: Optional[np.random.Generator] = None
     ):
         """Take a random sample of time and flux differences and the sum of squared
         errors. The samples are selected without replacement. The resulting
@@ -58,9 +58,10 @@ def select_difference_samples(
 
         Parameters
         ----------
-        number_of_samples : int
+        number_of_samples : int, optional
             Defines the number of samples to be randomly selected from the total
-            number of difference values.
+            number of difference values. If not specified, take all of the
+            avaliable values
         random_generator: np.random.Generator, optional
             A Numpy random.Generator to sample the lightcurve difference. This
             allows for repeatable random samples to be selected. By default None.
@@ -73,6 +74,9 @@ def select_difference_samples(
 
         """
 
+        if number_of_samples is None:
+            number_of_samples = self.number_of_difference_values
+
         if number_of_samples > self.number_of_difference_values:
             raise ValueError(
                 f"Requesting {number_of_samples} samples, but only "

diff --git a/src/tape/analysis/structurefunction2.py b/src/tape/analysis/structurefunction2.py
@@ -108,8 +108,8 @@ def calc_sf2(time, flux, err=None, band=None, lc_id=None, sf_method="basic", arg
                 aggregated_sfs: List[np.ndarray] = []
                 rng = np.random.default_rng(argument_container.random_seed)
                 for _ in range(argument_container.calculation_repetitions):
-                    if argument_container.equally_weight_lightcurves:
-                        sf_calculator._equally_weight_lightcurves(random_generator=rng)
+                    if argument_container.estimate_err:
+                        sf_calculator._bootstrap(random_generator=rng)
 
                     tmp_dts, tmp_sfs = sf_calculator.calculate()
                     aggregated_dts.append(tmp_dts)

diff --git a/tests/tape_tests/test_analysis.py b/tests/tape_tests/test_analysis.py
@@ -646,16 +646,15 @@ def test_sf2_provide_bins_in_argument_container():
 
 def test_sf2_with_random_sampling_one_lightcurve():
     """
-    Base case of using equal weighting passing only 1 light curve
+    Base case of passing only one light curve
     """
     lc_id = [1, 1, 1, 1, 1, 1, 1, 1]
     test_t = [1.11, 2.23, 3.45, 4.01, 5.67, 6.32, 7.88, 8.2]
     test_y = [0.11, 0.23, 0.45, 0.01, 0.67, 0.32, 0.88, 0.2]
     test_yerr = [0.1, 0.023, 0.045, 0.1, 0.067, 0.032, 0.8, 0.02]
     test_band = np.array(["r"] * len(test_y))
     test_arg_container = StructureFunctionArgumentContainer()
-    # TODO: Issue-180 will rename this
-    test_arg_container.equally_weight_lightcurves = True
+    test_arg_container.estimate_err = True
     test_arg_container.random_seed = 42
 
     res = analysis.calc_sf2(
@@ -752,10 +751,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve():
         argument_container=test_arg_container,
     )
 
-    assert res["dt"][0] == pytest.approx(3.4475, rel=0.001)
-    assert res["sf2"][0] == pytest.approx(-0.0030716, rel=0.001)
-    assert res["dt"][1] == pytest.approx(2.75786, rel=0.001)
-    assert res["sf2"][1] == pytest.approx(-0.01543, rel=0.001)
+    assert res["dt"][0] == pytest.approx(3.148214, rel=0.001)
+    assert res["sf2"][0] == pytest.approx(0.005365, rel=0.001)
+    assert res["dt"][1] == pytest.approx(2.891860, rel=0.001)
+    assert res["sf2"][1] == pytest.approx(0.048904, rel=0.001)
 
 
 def test_sf2_with_unequal_weighting_multiple_lightcurve():
@@ -846,7 +845,7 @@ def test_sf2_with_unequal_weighting_multiple_lightcurve():
     assert res["lc_id"][0] == "1"
     assert res["band"][0] == "g"
     assert res["dt"][0] == pytest.approx(1.2533, rel=0.001)
-    assert res["sf2"][0] == pytest.approx(-0.1641, rel=0.001)
+    assert res["sf2"][0] == pytest.approx(-0.164149, rel=0.001)
     assert res["lc_id"][5] == "2"
     assert res["band"][5] == "r"
     assert res["dt"][5] == pytest.approx(0.8875, rel=0.001)
@@ -935,10 +934,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings():
         argument_container=test_arg_container,
     )
 
-    assert res["dt"][0] == pytest.approx(3.1473, rel=0.001)
-    assert res["sf2"][0] == pytest.approx(0.005837, rel=0.001)
-    assert res["dt"][1] == pytest.approx(2.87768, rel=0.001)
-    assert res["sf2"][1] == pytest.approx(0.04912, rel=0.001)
+    assert res["dt"][0] == pytest.approx(3.148214, rel=0.001)
+    assert res["sf2"][0] == pytest.approx(0.005365, rel=0.001)
+    assert res["dt"][1] == pytest.approx(2.891860, rel=0.001)
+    assert res["sf2"][1] == pytest.approx(0.048904, rel=0.001)
 
 
 def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_bins():
@@ -1024,10 +1023,10 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_b
         argument_container=test_arg_container,
     )
 
-    assert res["dt"][0] == pytest.approx(0.64267, rel=0.001)
-    assert res["sf2"][0] == pytest.approx(0.04018, rel=0.001)
-    assert res["dt"][7] == pytest.approx(0.6095, rel=0.001)
-    assert res["sf2"][7] == pytest.approx(0.08424, rel=0.001)
+    assert res["dt"][0] == pytest.approx(0.662500, rel=0.001)
+    assert res["sf2"][0] == pytest.approx(0.031108, rel=0.001)
+    assert res["dt"][7] == pytest.approx(0.643333, rel=0.001)
+    assert res["sf2"][7] == pytest.approx(0.070499, rel=0.001)
 
 
 def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_combining():
@@ -1101,6 +1100,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
     test_band = np.array(["r"] * len(test_y))
     test_arg_container = StructureFunctionArgumentContainer()
     test_arg_container.equally_weight_lightcurves = True
+    test_arg_container.estimate_err = True
     test_arg_container.random_seed = 42
     test_arg_container.calculation_repetitions = 3
     test_arg_container.bin_count_target = 4
@@ -1116,11 +1116,11 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
     )
 
     assert res["dt"][0] == pytest.approx(0.55, rel=0.001)
-    assert res["sf2"][0] == pytest.approx(0.1001165, rel=0.001)
-    assert res["1_sigma"][0] == pytest.approx(0.0611, rel=0.001)
-    assert res["dt"][9] == pytest.approx(3.035, rel=0.001)
-    assert res["sf2"][9] == pytest.approx(0.0396455, rel=0.001)
-    assert res["1_sigma"][9] == pytest.approx(0.04501, rel=0.001)
+    assert res["sf2"][0] == pytest.approx(0.100116, rel=0.001)
+    assert res["1_sigma"][0] == pytest.approx(0.061128, rel=0.001)
+    assert res["dt"][9] == pytest.approx(3.035000, rel=0.001)
+    assert res["sf2"][9] == pytest.approx(0.039646, rel=0.001)
+    assert res["1_sigma"][9] == pytest.approx(0.045012, rel=0.001)
 
 
 def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_combining_non_default_sigma():
@@ -1195,6 +1195,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_and_com
     test_band = np.array(["r"] * len(test_y))
     test_arg_container = StructureFunctionArgumentContainer()
     test_arg_container.equally_weight_lightcurves = True
+    test_arg_container.estimate_err = True
     test_arg_container.random_seed = 42
     test_arg_container.calculation_repetitions = 3
     test_arg_container.bin_count_target = 4
@@ -1289,6 +1290,7 @@ def test_sf2_with_equal_weighting_multiple_lightcurve_multiple_samplings_small_b
     test_band = np.array(["r"] * len(test_y))
     test_arg_container = StructureFunctionArgumentContainer()
     test_arg_container.equally_weight_lightcurves = True
+    test_arg_container.estimate_err = True
     test_arg_container.random_seed = 42
     test_arg_container.calculation_repetitions = 100
     test_arg_container.bin_count_target = 4