Skip to content

Commit

Permalink
Merge pull request #224 from pymc-labs/rd-updates
Browse files Browse the repository at this point in the history
Add an `epsilon` parameter to `RegressionDiscontinuity` classes
  • Loading branch information
drbenvincent authored Jul 24, 2023
2 parents 21944bd + 694caab commit 90cf94b
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 21 deletions.
32 changes: 19 additions & 13 deletions causalpy/pymc_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,18 +543,19 @@ class RegressionDiscontinuity(ExperimentalDesign):
"""
A class to analyse regression discontinuity experiments.
:param data: A pandas dataframe
:param formula: A statistical model formula
:param treatment_threshold: A scalar threshold value at which the treatment
is applied
:param model: A PyMC model
:param running_variable_name: The name of the predictor variable that the treatment
threshold is based upon
.. note::
There is no pre/post intervention data distinction for the regression
discontinuity design, we fit all the data available.
:param data:
A pandas dataframe
:param formula:
A statistical model formula
:param treatment_threshold:
A scalar threshold value at which the treatment is applied
:param model:
A PyMC model
:param running_variable_name:
The name of the predictor variable that the treatment threshold is based upon
:param epsilon:
A small scalar value which determines how far above and below the treatment
threshold to evaluate the causal impact.
"""

def __init__(
Expand All @@ -564,6 +565,7 @@ def __init__(
treatment_threshold: float,
model=None,
running_variable_name: str = "x",
epsilon: float = 0.001,
**kwargs,
):
super().__init__(model=model, **kwargs)
Expand All @@ -572,6 +574,7 @@ def __init__(
self.formula = formula
self.running_variable_name = running_variable_name
self.treatment_threshold = treatment_threshold
self.epsilon = epsilon
self._input_validation()

y, X = dmatrices(formula, self.data)
Expand Down Expand Up @@ -609,7 +612,10 @@ def __init__(
self.x_discon = pd.DataFrame(
{
self.running_variable_name: np.array(
[self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
[
self.treatment_threshold - self.epsilon,
self.treatment_threshold + self.epsilon,
]
),
"treated": np.array([0, 1]),
}
Expand Down
29 changes: 21 additions & 8 deletions causalpy/skl_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,21 @@ def plot(self):

class RegressionDiscontinuity(ExperimentalDesign):
"""
Analyse data from regression discontinuity experiments.
.. note::
There is no pre/post intervention data distinction for the regression
discontinuity design, we fit all the data available.
A class to analyse regression discontinuity experiments.
:param data:
A pandas dataframe
:param formula:
A statistical model formula
:param treatment_threshold:
A scalar threshold value at which the treatment is applied
:param model:
A sci-kit learn model object
:param running_variable_name:
The name of the predictor variable that the treatment threshold is based upon
:param epsilon:
A small scalar value which determines how far above and below the treatment
threshold to evaluate the causal impact.
"""

def __init__(
Expand All @@ -362,13 +370,15 @@ def __init__(
treatment_threshold,
model=None,
running_variable_name="x",
epsilon: float = 0.001,
**kwargs,
):
super().__init__(model=model, **kwargs)
self.data = data
self.formula = formula
self.running_variable_name = running_variable_name
self.treatment_threshold = treatment_threshold
self.epsilon = epsilon
y, X = dmatrices(formula, self.data)
self._y_design_info = y.design_info
self._x_design_info = X.design_info
Expand Down Expand Up @@ -404,7 +414,10 @@ def __init__(
self.x_discon = pd.DataFrame(
{
self.running_variable_name: np.array(
[self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
[
self.treatment_threshold - self.epsilon,
self.treatment_threshold + self.epsilon,
]
),
"treated": np.array([0, 1]),
}
Expand Down
1 change: 1 addition & 0 deletions causalpy/tests/test_integration_pymc_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def test_rd():
formula="y ~ 1 + bs(x, df=6) + treated",
model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(df, pd.DataFrame)
assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
Expand Down
4 changes: 4 additions & 0 deletions causalpy/tests/test_integration_skl_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def test_rd_drinking():
running_variable_name="age",
model=LinearRegression(),
treatment_threshold=21,
epsilon=0.001,
)
assert isinstance(df, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand Down Expand Up @@ -81,6 +82,7 @@ def test_rd_linear_main_effects():
formula="y ~ 1 + x + treated",
model=LinearRegression(),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand All @@ -94,6 +96,7 @@ def test_rd_linear_with_interaction():
formula="y ~ 1 + x + treated + x:treated",
model=LinearRegression(),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand All @@ -108,6 +111,7 @@ def test_rd_linear_with_gaussian_process():
formula="y ~ 1 + x + treated",
model=GaussianProcessRegressor(kernel=kernel),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)

0 comments on commit 90cf94b

Please sign in to comment.