Skip to content

Commit

Permalink
Add support for optimizing by KPI instead of revenue.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 704413113
  • Loading branch information
lukmaz authored and The Meridian Authors committed Jan 21, 2025
1 parent 7757c4b commit f855bb0
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 28 deletions.
13 changes: 9 additions & 4 deletions meridian/analysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3464,6 +3464,7 @@ def optimal_freq(
self,
freq_grid: Sequence[float] | None = None,
use_posterior: bool = True,
use_kpi: bool = False,
selected_geos: Sequence[str | int] | None = None,
selected_times: Sequence[str | int] | None = None,
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
Expand All @@ -3476,15 +3477,18 @@ def optimal_freq(
number of impressions remains unchanged as frequency varies. Meridian solves
for the frequency at which posterior mean ROI is optimized.
Note: The ROI numerator is revenue if `revenue_per_kpi` is defined or if
`kpi_type == 'revenue'`. Otherwise, the ROI numerator is KPI units.
Note: The ROI numerator is revenue if `use_kpi` is `False`, otherwise, the
ROI numerator is KPI units.
Args:
freq_grid: List of frequency values. The ROI of each channel is calculated
for each frequency value in the list. By default, the list includes
numbers from `1.0` to the maximum frequency in increments of `0.1`.
use_posterior: Boolean. If `True`, posterior optimal frequencies are
generated. If `False`, prior optimal frequencies are generated.
use_kpi: Boolean. If `True`, the counterfactual metrics are calculated
using KPI. If `False`, the counterfactual metrics are calculated using
revenue.
selected_geos: Optional list containing a subset of geos to include. By
default, all geos are included.
selected_times: Optional list containing a subset of times to include. By
Expand Down Expand Up @@ -3521,7 +3525,6 @@ def optimal_freq(
ValueError: If there are no channels with reach and frequency data.
"""
dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
use_kpi = self._meridian.input_data.revenue_per_kpi is None
if self._meridian.n_rf_channels == 0:
raise ValueError(
"Must have at least one channel with reach and frequency data."
Expand Down Expand Up @@ -4011,7 +4014,9 @@ def response_curves(
self._meridian.rf_tensors.frequency
) * tf.convert_to_tensor(
self.optimal_freq(
selected_geos=selected_geos, selected_times=selected_times
selected_geos=selected_geos,
selected_times=selected_times,
use_kpi=use_kpi,
).optimal_frequency,
dtype=tf.float32,
)
Expand Down
1 change: 1 addition & 0 deletions meridian/analysis/analyzer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3636,6 +3636,7 @@ def test_optimal_frequency_data_no_revenue_per_kpi_correct(self):
freq_grid=[1.0, 2.0, 3.0],
confidence_level=constants.DEFAULT_CONFIDENCE_LEVEL,
use_posterior=True,
use_kpi=True,
)
expected = xr.Dataset(
coords={
Expand Down
46 changes: 32 additions & 14 deletions meridian/analysis/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def template_env(self) -> jinja2.Environment:
def _kpi_or_revenue(self) -> str:
return (
c.REVENUE
if self.meridian.input_data.revenue_per_kpi is not None
if self.nonoptimized_data.attrs[c.IS_REVENUE_KPI]
else c.KPI.upper()
)

Expand Down Expand Up @@ -905,6 +905,7 @@ def optimize(
target_mroi: float | None = None,
gtol: float = 0.0001,
use_optimal_frequency: bool = True,
use_kpi: bool = False,
confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
batch_size: int = c.DEFAULT_BATCH_SIZE,
) -> OptimizationResults:
Expand Down Expand Up @@ -960,6 +961,7 @@ def optimize(
use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
trained Meridian model for optimization. If `False`, uses historical
frequency.
use_kpi: If `True`, uses KPI instead of revenue as the outcome metric.
confidence_level: The threshold for computing the confidence intervals.
batch_size: Maximum draws per chain in each batch. The calculation is run
in batches to avoid memory exhaustion. If a memory error occurs, try
Expand Down Expand Up @@ -1004,7 +1006,9 @@ def optimize(
if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
optimal_frequency = tf.convert_to_tensor(
self._analyzer.optimal_freq(
use_posterior=use_posterior, selected_times=selected_time_dims
use_posterior=use_posterior,
selected_times=selected_time_dims,
use_kpi=use_kpi,
).optimal_frequency,
dtype=tf.float32,
)
Expand All @@ -1027,6 +1031,7 @@ def optimize(
step_size=step_size,
selected_times=selected_time_dims,
use_posterior=use_posterior,
use_kpi=use_kpi,
optimal_frequency=optimal_frequency,
batch_size=batch_size,
)
Expand All @@ -1049,6 +1054,7 @@ def optimize(

nonoptimized_data = self._create_budget_dataset(
use_posterior=use_posterior,
use_kpi=use_kpi,
hist_spend=hist_spend,
spend=rounded_spend,
selected_times=selected_time_dims,
Expand All @@ -1058,6 +1064,7 @@ def optimize(
)
nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
use_posterior=use_posterior,
use_kpi=use_kpi,
hist_spend=hist_spend,
spend=rounded_spend,
selected_times=selected_time_dims,
Expand All @@ -1068,6 +1075,7 @@ def optimize(
)
optimized_data = self._create_budget_dataset(
use_posterior=use_posterior,
use_kpi=use_kpi,
hist_spend=hist_spend,
spend=optimal_spend,
selected_times=selected_time_dims,
Expand Down Expand Up @@ -1321,6 +1329,7 @@ def _create_budget_dataset(
hist_spend: np.ndarray,
spend: np.ndarray,
use_posterior: bool = True,
use_kpi: bool = False,
selected_times: Sequence[str] | None = None,
optimal_frequency: Sequence[float] | None = None,
attrs: Mapping[str, Any] | None = None,
Expand All @@ -1336,7 +1345,6 @@ def _create_budget_dataset(
hist_spend, spend, optimal_frequency
)
)
kpi_only = self._meridian.revenue_per_kpi is None
budget = np.sum(spend)
all_times = self._meridian.input_data.time.values.tolist()

Expand All @@ -1350,7 +1358,7 @@ def _create_budget_dataset(
frequency=new_frequency,
),
selected_times=selected_times,
use_kpi=kpi_only,
use_kpi=use_kpi,
batch_size=batch_size,
include_non_paid_channels=False,
)
Expand Down Expand Up @@ -1378,7 +1386,7 @@ def _create_budget_dataset(
frequency=new_frequency,
),
selected_times=selected_times,
use_kpi=kpi_only,
use_kpi=use_kpi,
batch_size=batch_size,
)
mean_expected_outcome = tf.reduce_mean(expected_outcome, (0, 1)) # a scalar
Expand Down Expand Up @@ -1425,7 +1433,7 @@ def _create_budget_dataset(
selected_times=selected_times,
batch_size=batch_size,
by_reach=True,
use_kpi=kpi_only,
use_kpi=use_kpi,
),
confidence_level=confidence_level,
include_median=True,
Expand Down Expand Up @@ -1471,7 +1479,9 @@ def _create_budget_dataset(
c.TOTAL_INCREMENTAL_OUTCOME: total_incremental_outcome,
c.TOTAL_ROI: total_incremental_outcome / budget,
c.TOTAL_CPIK: total_cpik,
c.IS_REVENUE_KPI: not kpi_only,
c.IS_REVENUE_KPI: (
self._meridian.input_data.kpi_type == c.REVENUE or not use_kpi
),
c.CONFIDENCE_LEVEL: confidence_level,
c.USE_HISTORICAL_BUDGET: use_historical_budget,
}
Expand Down Expand Up @@ -1547,6 +1557,7 @@ def _update_incremental_outcome_grid(
multipliers_grid: tf.Tensor,
selected_times: Sequence[str],
use_posterior: bool = True,
use_kpi: bool = False,
optimal_frequency: xr.DataArray | None = None,
batch_size: int = c.DEFAULT_BATCH_SIZE,
):
Expand All @@ -1564,6 +1575,9 @@ def _update_incremental_outcome_grid(
use_posterior: Boolean. If `True`, then the incremental outcome is derived
from the posterior distribution of the model. Otherwise, the prior
distribution is used.
use_kpi: Boolean. If `True`, then the incremental outcome is derived from
the KPI impact. Otherwise, the incremental outcome is derived from the
revenue impact.
optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
the optimal frequency per channel, that maximizes posterior mean roi.
Value is `None` if the model does not contain reach and frequency data,
Expand Down Expand Up @@ -1605,7 +1619,6 @@ def _update_incremental_outcome_grid(
# incremental_outcome returns a three dimensional tensor with dims
# (n_chains x n_draws x n_total_channels). Incremental_outcome_grid requires
# incremental outcome by channel.
use_kpi = self._meridian.revenue_per_kpi is None
incremental_outcome_grid[i, :] = np.mean(
self._analyzer.incremental_outcome(
use_posterior=use_posterior,
Expand All @@ -1631,6 +1644,7 @@ def _create_grids(
step_size: int,
selected_times: Sequence[str],
use_posterior: bool = True,
use_kpi: bool = False,
optimal_frequency: xr.DataArray | None = None,
batch_size: int = c.DEFAULT_BATCH_SIZE,
) -> tuple[np.ndarray, np.ndarray]:
Expand All @@ -1649,6 +1663,9 @@ def _create_grids(
use_posterior: Boolean. If `True`, then the incremental outcome is derived
from the posterior distribution of the model. Otherwise, the prior
distribution is used.
use_kpi: Boolean. If `True`, then the incremental outcome is derived from
the KPI impact. Otherwise, the incremental outcome is derived from the
revenue impact.
optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
the optimal frequency per channel, that maximizes posterior mean roi.
Value is `None` if the model does not contain reach and frequency data,
Expand Down Expand Up @@ -1688,12 +1705,13 @@ def _create_grids(
)
for i in range(n_grid_rows):
self._update_incremental_outcome_grid(
i,
incremental_outcome_grid,
multipliers_grid,
selected_times,
use_posterior,
optimal_frequency,
i=i,
incremental_outcome_grid=incremental_outcome_grid,
multipliers_grid=multipliers_grid,
selected_times=selected_times,
use_posterior=use_posterior,
use_kpi=use_kpi,
optimal_frequency=optimal_frequency,
batch_size=batch_size,
)
# In theory, for RF channels, incremental_outcome/spend should always be
Expand Down
71 changes: 68 additions & 3 deletions meridian/analysis/optimizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3263,6 +3263,17 @@ def setUp(self):
seed=0,
)
)
self.input_data_non_revenue_revenue_per_kpi = (
data_test_utils.sample_input_data_non_revenue_revenue_per_kpi(
n_geos=_N_GEOS,
n_times=_N_TIMES,
n_media_times=_N_MEDIA_TIMES,
n_controls=_N_CONTROLS,
n_media_channels=_N_MEDIA_CHANNELS,
n_rf_channels=_N_RF_CHANNELS,
seed=0,
)
)
custom_model_spec = spec.ModelSpec(
prior=prior_distribution.PriorDistribution(
knot_values=tfp.distributions.Normal(0.0, 5.0, name=c.KNOT_VALUES),
Expand All @@ -3282,9 +3293,15 @@ def setUp(self):
input_data=self.input_data_media_and_rf_kpi,
model_spec=custom_model_spec,
)
self.meridian_non_revenue_revenue_per_kpi = model.Meridian(
input_data=self.input_data_non_revenue_revenue_per_kpi,
)
self.budget_optimizer_media_and_rf_kpi = optimizer.BudgetOptimizer(
self.meridian_media_and_rf_kpi
)
self.budget_optimizer_non_revenue_revenue_per_kpi = (
optimizer.BudgetOptimizer(self.meridian_non_revenue_revenue_per_kpi)
)
self.enter_context(
mock.patch.object(
model.Meridian,
Expand Down Expand Up @@ -3319,7 +3336,9 @@ def test_incremental_outcome_called_correct_optimize(
)
)

self.budget_optimizer_media_and_rf_kpi.optimize(use_posterior=use_posterior)
self.budget_optimizer_media_and_rf_kpi.optimize(
use_posterior=use_posterior, use_kpi=True
)

mock_incremental_outcome.assert_called_with(
# marginal roi computation in the analyzer transitively calls
Expand Down Expand Up @@ -3351,7 +3370,9 @@ def test_expected_outcome_called_correct_optimize(self, use_posterior: bool):
)),
)
)
self.budget_optimizer_media_and_rf_kpi.optimize(use_posterior=use_posterior)
self.budget_optimizer_media_and_rf_kpi.optimize(
use_posterior=use_posterior, use_kpi=True
)
mock_expected_outcome.assert_called_with(
use_posterior=use_posterior,
new_data=mock.ANY,
Expand All @@ -3361,7 +3382,9 @@ def test_expected_outcome_called_correct_optimize(self, use_posterior: bool):
)

def test_results_kpi_only(self):
optimization_results = self.budget_optimizer_media_and_rf_kpi.optimize()
optimization_results = self.budget_optimizer_media_and_rf_kpi.optimize(
use_kpi=True
)
for var in (c.ROI, c.MROI, c.CPIK, c.EFFECTIVENESS):
self.assertIsNotNone(optimization_results.optimized_data[var])
self.assertIsNotNone(optimization_results.nonoptimized_data[var])
Expand All @@ -3386,6 +3409,48 @@ def test_results_kpi_only(self):
]
)

@parameterized.parameters([True, False])
def test_use_kpi_non_revenue_revenue_per_kpi(self, use_kpi: bool):
optimization_results = (
self.budget_optimizer_non_revenue_revenue_per_kpi.optimize(
use_kpi=use_kpi
)
)

for var in (c.ROI, c.MROI, c.CPIK, c.EFFECTIVENESS):
self.assertIsNotNone(optimization_results.optimized_data[var])
self.assertIsNotNone(optimization_results.nonoptimized_data[var])
self.assertIsNotNone(
optimization_results.nonoptimized_data_with_optimal_freq[var]
)
for attr in (c.TOTAL_ROI, c.TOTAL_CPIK):
self.assertIsNotNone(optimization_results.optimized_data.attrs[attr])
self.assertIsNotNone(optimization_results.nonoptimized_data.attrs[attr])
self.assertIsNotNone(
optimization_results.nonoptimized_data_with_optimal_freq.attrs[attr]
)
self.assertEqual(
optimization_results.optimized_data.attrs[c.IS_REVENUE_KPI], not use_kpi
)
self.assertEqual(
optimization_results.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
not use_kpi,
)
self.assertEqual(
optimization_results.nonoptimized_data_with_optimal_freq.attrs[
c.IS_REVENUE_KPI
],
not use_kpi,
)

def test_optimize_no_use_kpi_no_revenue_per_kpi_raises_error(self):
with self.assertRaisesWithLiteralMatch(
ValueError,
'Revenue analysis is not available when `revenue_per_kpi` is unknown.'
' Set `use_kpi=True` to perform KPI analysis instead.',
):
self.budget_optimizer_media_and_rf_kpi.optimize(use_kpi=False)


if __name__ == '__main__':
absltest.main()
Loading

0 comments on commit f855bb0

Please sign in to comment.