Merge pull request #359 from pymc-labs/summary_pymc

pymc-labs · Jun 19, 2024 · 9330a9c · 9330a9c
2 parents 2916688 + 267d7c7
commit 9330a9c
Show file tree

Hide file tree

Showing 11 changed files with 572 additions and 715 deletions.
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -102,29 +102,37 @@ def print_coefficients(self, round_to=None) -> None:
         ...                 "progressbar": False
         ...             }),
         ...  )
-        >>> result.print_coefficients(round_to=1) # doctest: +NUMBER
+        >>> result.print_coefficients(round_to=1)
         Model coefficients:
-        Intercept                     1, 94% HDI [1, 1]
-        post_treatment[T.True]        1, 94% HDI [0.9, 1]
-        group                         0.2, 94% HDI [0.09, 0.2]
-        group:post_treatment[T.True]  0.5, 94% HDI [0.4, 0.6]
-        sigma                         0.08, 94% HDI [0.07, 0.1]
+            Intercept                     1, 94% HDI [1, 1]
+            post_treatment[T.True]        1, 94% HDI [0.9, 1]
+            group                         0.2, 94% HDI [0.09, 0.2]
+            group:post_treatment[T.True]  0.5, 94% HDI [0.4, 0.6]
+            sigma                         0.08, 94% HDI [0.07, 0.1]
         """
+
+        def print_row(
+            max_label_length: int, name: str, coeff_samples: xr.DataArray, round_to: int
+        ) -> None:
+            """Print one row of the coefficient table"""
+            formatted_name = f"  {name: <{max_label_length}}"
+            formatted_val = f"{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]"  # noqa: E501
+            print(f"  {formatted_name}  {formatted_val}")
+
         print("Model coefficients:")
         coeffs = az.extract(self.idata.posterior, var_names="beta")
-        # Note: f"{name: <30}" pads the name with spaces so that we have alignment of
-        # the stats despite variable names of different lengths
+
+        # Determine the width of the longest label
+        max_label_length = max(len(name) for name in self.labels + ["sigma"])
+
         for name in self.labels:
             coeff_samples = coeffs.sel(coeffs=name)
-            print(
-                f"{name: <30}{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]"  # noqa: E501
-            )
-        # add coeff for measurement std
+            print_row(max_label_length, name, coeff_samples, round_to)
+
+        # Add coefficient for measurement std
         coeff_samples = az.extract(self.model.idata.posterior, var_names="sigma")
         name = "sigma"
-        print(
-            f"{name: <30}{round_num(coeff_samples.mean().data, round_to)}, 94% HDI [{round_num(coeff_samples.quantile(0.03).data, round_to)}, {round_num(coeff_samples.quantile(1-0.03).data, round_to)}]"  # noqa: E501
-        )
+        print_row(max_label_length, name, coeff_samples, round_to)
 
 
 class PrePostFit(ExperimentalDesign, PrePostFitDataValidator):
@@ -160,13 +168,13 @@ class PrePostFit(ExperimentalDesign, PrePostFitDataValidator):
     ...         }
     ...     ),
     ... )
-    >>> result.summary(round_to=1) # doctest: +NUMBER
+    >>> result.summary(round_to=1)
     ==================================Pre-Post Fit==================================
     Formula: actual ~ 0 + a + g
     Model coefficients:
-    a                             0.6, 94% HDI [0.6, 0.6]
-    g                             0.4, 94% HDI [0.4, 0.4]
-    sigma                         0.8, 94% HDI [0.6, 0.9]
+        a      0.6, 94% HDI [0.6, 0.6]
+        g      0.4, 94% HDI [0.4, 0.4]
+        sigma  0.8, 94% HDI [0.6, 0.9]
     """
 
     def __init__(
@@ -1181,10 +1189,10 @@ class PrePostNEGD(ExperimentalDesign, PrePostNEGDDataValidator):
     Results:
     Causal impact = 2, $CI_{94%}$[2, 2]
     Model coefficients:
-    Intercept                     -0.5, 94% HDI [-1, 0.2]
-    C(group)[T.1]                 2, 94% HDI [2, 2]
-    pre                           1, 94% HDI [1, 1]
-    sigma                         0.5, 94% HDI [0.5, 0.6]
+        Intercept      -0.5, 94% HDI [-1, 0.2]
+        C(group)[T.1]  2, 94% HDI [2, 2]
+        pre            1, 94% HDI [1, 1]
+        sigma          0.5, 94% HDI [0.5, 0.6]
     """
 
     def __init__(

diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
@@ -43,6 +43,7 @@ def test_did():
     assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 # TODO: set up fixture for the banks dataset
@@ -98,6 +99,7 @@ def test_did_banks_simple():
     assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -149,6 +151,7 @@ def test_did_banks_multi():
     assert isinstance(result, cp.pymc_experiments.DifferenceInDifferences)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -174,6 +177,7 @@ def test_rd():
     assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -200,6 +204,7 @@ def test_rd_bandwidth():
     assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -229,6 +234,7 @@ def test_rd_drinking():
     assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 def setup_regression_kink_data(kink):
@@ -281,6 +287,7 @@ def test_rkink():
     assert isinstance(result, cp.pymc_experiments.RegressionKink)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -307,6 +314,7 @@ def test_rkink_bandwidth():
     assert isinstance(result, cp.pymc_experiments.RegressionKink)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -336,6 +344,7 @@ def test_its():
     assert isinstance(result, cp.pymc_experiments.SyntheticControl)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -366,6 +375,7 @@ def test_its_covid():
     assert isinstance(result, cp.pymc_experiments.InterruptedTimeSeries)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -392,6 +402,7 @@ def test_sc():
     assert isinstance(result, cp.pymc_experiments.SyntheticControl)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -430,6 +441,7 @@ def test_sc_brexit():
     assert isinstance(result, cp.pymc_experiments.SyntheticControl)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -455,6 +467,7 @@ def test_ancova():
     assert isinstance(result, cp.pymc_experiments.PrePostNEGD)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration
@@ -485,6 +498,7 @@ def test_geolift1():
     assert isinstance(result, cp.pymc_experiments.SyntheticControl)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    result.summary()
 
 
 @pytest.mark.integration

diff --git a/docs/source/notebooks/ancova_pymc.ipynb b/docs/source/notebooks/ancova_pymc.ipynb
diff --git a/docs/source/notebooks/did_pymc.ipynb b/docs/source/notebooks/did_pymc.ipynb
diff --git a/docs/source/notebooks/did_pymc_banks.ipynb b/docs/source/notebooks/did_pymc_banks.ipynb
diff --git a/docs/source/notebooks/its_pymc.ipynb b/docs/source/notebooks/its_pymc.ipynb
diff --git a/docs/source/notebooks/rd_pymc.ipynb b/docs/source/notebooks/rd_pymc.ipynb
diff --git a/docs/source/notebooks/rd_pymc_drinking.ipynb b/docs/source/notebooks/rd_pymc_drinking.ipynb
diff --git a/docs/source/notebooks/rkink_pymc.ipynb b/docs/source/notebooks/rkink_pymc.ipynb
diff --git a/docs/source/notebooks/sc_pymc.ipynb b/docs/source/notebooks/sc_pymc.ipynb
diff --git a/docs/source/notebooks/sc_pymc_brexit.ipynb b/docs/source/notebooks/sc_pymc_brexit.ipynb