Add support for optimizing by KPI instead of revenue.

PiperOrigin-RevId: 704413113
google · Jan 21, 2025 · f855bb0 · f855bb0
1 parent 7757c4b
commit f855bb0
Show file tree

Hide file tree

Showing 7 changed files with 139 additions and 28 deletions.
diff --git a/meridian/analysis/analyzer.py b/meridian/analysis/analyzer.py
@@ -3464,6 +3464,7 @@ def optimal_freq(
       self,
       freq_grid: Sequence[float] | None = None,
       use_posterior: bool = True,
+      use_kpi: bool = False,
       selected_geos: Sequence[str | int] | None = None,
       selected_times: Sequence[str | int] | None = None,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
@@ -3476,15 +3477,18 @@ def optimal_freq(
     number of impressions remains unchanged as frequency varies. Meridian solves
     for the frequency at which posterior mean ROI is optimized.
 
-    Note: The ROI numerator is revenue if `revenue_per_kpi` is defined or if
-    `kpi_type == 'revenue'`. Otherwise, the ROI numerator is KPI units.
+    Note: The ROI numerator is revenue if `use_kpi` is `False`, otherwise, the
+    ROI numerator is KPI units.
 
     Args:
       freq_grid: List of frequency values. The ROI of each channel is calculated
         for each frequency value in the list. By default, the list includes
         numbers from `1.0` to the maximum frequency in increments of `0.1`.
       use_posterior: Boolean. If `True`, posterior optimal frequencies are
         generated. If `False`, prior optimal frequencies are generated.
+      use_kpi: Boolean. If `True`, the counterfactual metrics are calculated
+        using KPI. If `False`, the counterfactual metrics are calculated using
+        revenue.
       selected_geos: Optional list containing a subset of geos to include. By
         default, all geos are included.
       selected_times: Optional list containing a subset of times to include. By
@@ -3521,7 +3525,6 @@ def optimal_freq(
       ValueError: If there are no channels with reach and frequency data.
     """
     dist_type = constants.POSTERIOR if use_posterior else constants.PRIOR
-    use_kpi = self._meridian.input_data.revenue_per_kpi is None
     if self._meridian.n_rf_channels == 0:
       raise ValueError(
           "Must have at least one channel with reach and frequency data."
@@ -4011,7 +4014,9 @@ def response_curves(
           self._meridian.rf_tensors.frequency
       ) * tf.convert_to_tensor(
           self.optimal_freq(
-              selected_geos=selected_geos, selected_times=selected_times
+              selected_geos=selected_geos,
+              selected_times=selected_times,
+              use_kpi=use_kpi,
           ).optimal_frequency,
           dtype=tf.float32,
       )

diff --git a/meridian/analysis/analyzer_test.py b/meridian/analysis/analyzer_test.py
@@ -3636,6 +3636,7 @@ def test_optimal_frequency_data_no_revenue_per_kpi_correct(self):
         freq_grid=[1.0, 2.0, 3.0],
         confidence_level=constants.DEFAULT_CONFIDENCE_LEVEL,
         use_posterior=True,
+        use_kpi=True,
     )
     expected = xr.Dataset(
         coords={

diff --git a/meridian/analysis/optimizer.py b/meridian/analysis/optimizer.py
@@ -111,7 +111,7 @@ def template_env(self) -> jinja2.Environment:
   def _kpi_or_revenue(self) -> str:
     return (
         c.REVENUE
-        if self.meridian.input_data.revenue_per_kpi is not None
+        if self.nonoptimized_data.attrs[c.IS_REVENUE_KPI]
         else c.KPI.upper()
     )
 
@@ -905,6 +905,7 @@ def optimize(
       target_mroi: float | None = None,
       gtol: float = 0.0001,
       use_optimal_frequency: bool = True,
+      use_kpi: bool = False,
       confidence_level: float = c.DEFAULT_CONFIDENCE_LEVEL,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
   ) -> OptimizationResults:
@@ -960,6 +961,7 @@ def optimize(
       use_optimal_frequency: If `True`, uses `optimal_frequency` calculated by
         trained Meridian model for optimization. If `False`, uses historical
         frequency.
+      use_kpi: If `True`, uses KPI instead of revenue as the outcome metric.
       confidence_level: The threshold for computing the confidence intervals.
       batch_size: Maximum draws per chain in each batch. The calculation is run
         in batches to avoid memory exhaustion. If a memory error occurs, try
@@ -1004,7 +1006,9 @@ def optimize(
     if self._meridian.n_rf_channels > 0 and use_optimal_frequency:
       optimal_frequency = tf.convert_to_tensor(
           self._analyzer.optimal_freq(
-              use_posterior=use_posterior, selected_times=selected_time_dims
+              use_posterior=use_posterior,
+              selected_times=selected_time_dims,
+              use_kpi=use_kpi,
           ).optimal_frequency,
           dtype=tf.float32,
       )
@@ -1027,6 +1031,7 @@ def optimize(
         step_size=step_size,
         selected_times=selected_time_dims,
         use_posterior=use_posterior,
+        use_kpi=use_kpi,
         optimal_frequency=optimal_frequency,
         batch_size=batch_size,
     )
@@ -1049,6 +1054,7 @@ def optimize(
 
     nonoptimized_data = self._create_budget_dataset(
         use_posterior=use_posterior,
+        use_kpi=use_kpi,
         hist_spend=hist_spend,
         spend=rounded_spend,
         selected_times=selected_time_dims,
@@ -1058,6 +1064,7 @@ def optimize(
     )
     nonoptimized_data_with_optimal_freq = self._create_budget_dataset(
         use_posterior=use_posterior,
+        use_kpi=use_kpi,
         hist_spend=hist_spend,
         spend=rounded_spend,
         selected_times=selected_time_dims,
@@ -1068,6 +1075,7 @@ def optimize(
     )
     optimized_data = self._create_budget_dataset(
         use_posterior=use_posterior,
+        use_kpi=use_kpi,
         hist_spend=hist_spend,
         spend=optimal_spend,
         selected_times=selected_time_dims,
@@ -1321,6 +1329,7 @@ def _create_budget_dataset(
       hist_spend: np.ndarray,
       spend: np.ndarray,
       use_posterior: bool = True,
+      use_kpi: bool = False,
       selected_times: Sequence[str] | None = None,
       optimal_frequency: Sequence[float] | None = None,
       attrs: Mapping[str, Any] | None = None,
@@ -1336,7 +1345,6 @@ def _create_budget_dataset(
             hist_spend, spend, optimal_frequency
         )
     )
-    kpi_only = self._meridian.revenue_per_kpi is None
     budget = np.sum(spend)
     all_times = self._meridian.input_data.time.values.tolist()
 
@@ -1350,7 +1358,7 @@ def _create_budget_dataset(
             frequency=new_frequency,
         ),
         selected_times=selected_times,
-        use_kpi=kpi_only,
+        use_kpi=use_kpi,
         batch_size=batch_size,
         include_non_paid_channels=False,
     )
@@ -1378,7 +1386,7 @@ def _create_budget_dataset(
             frequency=new_frequency,
         ),
         selected_times=selected_times,
-        use_kpi=kpi_only,
+        use_kpi=use_kpi,
         batch_size=batch_size,
     )
     mean_expected_outcome = tf.reduce_mean(expected_outcome, (0, 1))  # a scalar
@@ -1425,7 +1433,7 @@ def _create_budget_dataset(
             selected_times=selected_times,
             batch_size=batch_size,
             by_reach=True,
-            use_kpi=kpi_only,
+            use_kpi=use_kpi,
         ),
         confidence_level=confidence_level,
         include_median=True,
@@ -1471,7 +1479,9 @@ def _create_budget_dataset(
         c.TOTAL_INCREMENTAL_OUTCOME: total_incremental_outcome,
         c.TOTAL_ROI: total_incremental_outcome / budget,
         c.TOTAL_CPIK: total_cpik,
-        c.IS_REVENUE_KPI: not kpi_only,
+        c.IS_REVENUE_KPI: (
+            self._meridian.input_data.kpi_type == c.REVENUE or not use_kpi
+        ),
         c.CONFIDENCE_LEVEL: confidence_level,
         c.USE_HISTORICAL_BUDGET: use_historical_budget,
     }
@@ -1547,6 +1557,7 @@ def _update_incremental_outcome_grid(
       multipliers_grid: tf.Tensor,
       selected_times: Sequence[str],
       use_posterior: bool = True,
+      use_kpi: bool = False,
       optimal_frequency: xr.DataArray | None = None,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
   ):
@@ -1564,6 +1575,9 @@ def _update_incremental_outcome_grid(
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      use_kpi: Boolean. If `True`, then the incremental outcome is derived from
+        the KPI impact. Otherwise, the incremental outcome is derived from the
+        revenue impact.
       optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
         the optimal frequency per channel, that maximizes posterior mean roi.
         Value is `None` if the model does not contain reach and frequency data,
@@ -1605,7 +1619,6 @@ def _update_incremental_outcome_grid(
     # incremental_outcome returns a three dimensional tensor with dims
     # (n_chains x n_draws x n_total_channels). Incremental_outcome_grid requires
     # incremental outcome by channel.
-    use_kpi = self._meridian.revenue_per_kpi is None
     incremental_outcome_grid[i, :] = np.mean(
         self._analyzer.incremental_outcome(
             use_posterior=use_posterior,
@@ -1631,6 +1644,7 @@ def _create_grids(
       step_size: int,
       selected_times: Sequence[str],
       use_posterior: bool = True,
+      use_kpi: bool = False,
       optimal_frequency: xr.DataArray | None = None,
       batch_size: int = c.DEFAULT_BATCH_SIZE,
   ) -> tuple[np.ndarray, np.ndarray]:
@@ -1649,6 +1663,9 @@ def _create_grids(
       use_posterior: Boolean. If `True`, then the incremental outcome is derived
         from the posterior distribution of the model. Otherwise, the prior
         distribution is used.
+      use_kpi: Boolean. If `True`, then the incremental outcome is derived from
+        the KPI impact. Otherwise, the incremental outcome is derived from the
+        revenue impact.
       optimal_frequency: xr.DataArray with dimension `n_rf_channels`, containing
         the optimal frequency per channel, that maximizes posterior mean roi.
         Value is `None` if the model does not contain reach and frequency data,
@@ -1688,12 +1705,13 @@ def _create_grids(
     )
     for i in range(n_grid_rows):
       self._update_incremental_outcome_grid(
-          i,
-          incremental_outcome_grid,
-          multipliers_grid,
-          selected_times,
-          use_posterior,
-          optimal_frequency,
+          i=i,
+          incremental_outcome_grid=incremental_outcome_grid,
+          multipliers_grid=multipliers_grid,
+          selected_times=selected_times,
+          use_posterior=use_posterior,
+          use_kpi=use_kpi,
+          optimal_frequency=optimal_frequency,
           batch_size=batch_size,
       )
     # In theory, for RF channels, incremental_outcome/spend should always be

diff --git a/meridian/analysis/optimizer_test.py b/meridian/analysis/optimizer_test.py
@@ -3263,6 +3263,17 @@ def setUp(self):
             seed=0,
         )
     )
+    self.input_data_non_revenue_revenue_per_kpi = (
+        data_test_utils.sample_input_data_non_revenue_revenue_per_kpi(
+            n_geos=_N_GEOS,
+            n_times=_N_TIMES,
+            n_media_times=_N_MEDIA_TIMES,
+            n_controls=_N_CONTROLS,
+            n_media_channels=_N_MEDIA_CHANNELS,
+            n_rf_channels=_N_RF_CHANNELS,
+            seed=0,
+        )
+    )
     custom_model_spec = spec.ModelSpec(
         prior=prior_distribution.PriorDistribution(
             knot_values=tfp.distributions.Normal(0.0, 5.0, name=c.KNOT_VALUES),
@@ -3282,9 +3293,15 @@ def setUp(self):
         input_data=self.input_data_media_and_rf_kpi,
         model_spec=custom_model_spec,
     )
+    self.meridian_non_revenue_revenue_per_kpi = model.Meridian(
+        input_data=self.input_data_non_revenue_revenue_per_kpi,
+    )
     self.budget_optimizer_media_and_rf_kpi = optimizer.BudgetOptimizer(
         self.meridian_media_and_rf_kpi
     )
+    self.budget_optimizer_non_revenue_revenue_per_kpi = (
+        optimizer.BudgetOptimizer(self.meridian_non_revenue_revenue_per_kpi)
+    )
     self.enter_context(
         mock.patch.object(
             model.Meridian,
@@ -3319,7 +3336,9 @@ def test_incremental_outcome_called_correct_optimize(
         )
     )
 
-    self.budget_optimizer_media_and_rf_kpi.optimize(use_posterior=use_posterior)
+    self.budget_optimizer_media_and_rf_kpi.optimize(
+        use_posterior=use_posterior, use_kpi=True
+    )
 
     mock_incremental_outcome.assert_called_with(
         # marginal roi computation in the analyzer transitively calls
@@ -3351,7 +3370,9 @@ def test_expected_outcome_called_correct_optimize(self, use_posterior: bool):
             )),
         )
     )
-    self.budget_optimizer_media_and_rf_kpi.optimize(use_posterior=use_posterior)
+    self.budget_optimizer_media_and_rf_kpi.optimize(
+        use_posterior=use_posterior, use_kpi=True
+    )
     mock_expected_outcome.assert_called_with(
         use_posterior=use_posterior,
         new_data=mock.ANY,
@@ -3361,7 +3382,9 @@ def test_expected_outcome_called_correct_optimize(self, use_posterior: bool):
     )
 
   def test_results_kpi_only(self):
-    optimization_results = self.budget_optimizer_media_and_rf_kpi.optimize()
+    optimization_results = self.budget_optimizer_media_and_rf_kpi.optimize(
+        use_kpi=True
+    )
     for var in (c.ROI, c.MROI, c.CPIK, c.EFFECTIVENESS):
       self.assertIsNotNone(optimization_results.optimized_data[var])
       self.assertIsNotNone(optimization_results.nonoptimized_data[var])
@@ -3386,6 +3409,48 @@ def test_results_kpi_only(self):
         ]
     )
 
+  @parameterized.parameters([True, False])
+  def test_use_kpi_non_revenue_revenue_per_kpi(self, use_kpi: bool):
+    optimization_results = (
+        self.budget_optimizer_non_revenue_revenue_per_kpi.optimize(
+            use_kpi=use_kpi
+        )
+    )
+
+    for var in (c.ROI, c.MROI, c.CPIK, c.EFFECTIVENESS):
+      self.assertIsNotNone(optimization_results.optimized_data[var])
+      self.assertIsNotNone(optimization_results.nonoptimized_data[var])
+      self.assertIsNotNone(
+          optimization_results.nonoptimized_data_with_optimal_freq[var]
+      )
+    for attr in (c.TOTAL_ROI, c.TOTAL_CPIK):
+      self.assertIsNotNone(optimization_results.optimized_data.attrs[attr])
+      self.assertIsNotNone(optimization_results.nonoptimized_data.attrs[attr])
+      self.assertIsNotNone(
+          optimization_results.nonoptimized_data_with_optimal_freq.attrs[attr]
+      )
+    self.assertEqual(
+        optimization_results.optimized_data.attrs[c.IS_REVENUE_KPI], not use_kpi
+    )
+    self.assertEqual(
+        optimization_results.nonoptimized_data.attrs[c.IS_REVENUE_KPI],
+        not use_kpi,
+    )
+    self.assertEqual(
+        optimization_results.nonoptimized_data_with_optimal_freq.attrs[
+            c.IS_REVENUE_KPI
+        ],
+        not use_kpi,
+    )
+
+  def test_optimize_no_use_kpi_no_revenue_per_kpi_raises_error(self):
+    with self.assertRaisesWithLiteralMatch(
+        ValueError,
+        'Revenue analysis is not available when `revenue_per_kpi` is unknown.'
+        ' Set `use_kpi=True` to perform KPI analysis instead.',
+    ):
+      self.budget_optimizer_media_and_rf_kpi.optimize(use_kpi=False)
+
 
 if __name__ == '__main__':
   absltest.main()