Add new_data argument to summary_metrics.

PiperOrigin-RevId: 715248061
google · Jan 21, 2025 · 64585da · 64585da
1 parent 7757c4b
commit 64585da
Show file tree

Hide file tree

Showing 5 changed files with 626 additions and 168 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,6 +22,9 @@ To release a new version (e.g. from `1.0.0` -> `2.0.0`):
 -->
 
 ## [Unreleased]
+
+## [0.17.0] - 2025-01-16
+* Add `new_data` argument to `Analyzer.summary_metrics` method.
 * Define constants for channel constraints in the optimizer.
 
 ## [0.16.0] - 2025-01-08
@@ -148,6 +151,7 @@ To release a new version (e.g. from `1.0.0` -> `2.0.0`):
 [0.14.0]: https://github.com/google/meridian/releases/tag/v0.14.0
 [0.15.0]: https://github.com/google/meridian/releases/tag/v0.15.0
 [0.16.0]: https://github.com/google/meridian/releases/tag/v0.16.0
-[Unreleased]: https://github.com/google/meridian/compare/v0.16.0...HEAD
+[0.17.0]: https://github.com/google/meridian/releases/tag/v0.17.0
+[Unreleased]: https://github.com/google/meridian/compare/v0.17.0...HEAD
 
 
diff --git a/meridian/__init__.py b/meridian/__init__.py
@@ -14,7 +14,7 @@
 
 """Meridian API."""
 
-__version__ = "0.16.0"
+__version__ = "0.17.0"
 
 
 from meridian import analysis

diff --git a/meridian/analysis/analyzer.py b/meridian/analysis/analyzer.py
@@ -739,7 +739,9 @@ def _get_scaled_data_tensors(
         `revenue_per_kpi`. If `None`, the original scaled tensors from the
         Meridian object are used. If `new_data` is provided, the output contains
         the scaled versions of the tensors in `new_data` and the original scaled
-        versions of all the remaining tensors.
+        versions of all the remaining tensors. The new tensors' dimensions must
+        match the dimensions of the corresponding original tensors from
+        `meridian.input_data`.
       include_non_paid_channels: Boolean. If `True`, organic media, organic RF
         and non-media treatments data is included in the output.
 
@@ -1146,7 +1148,9 @@ def expected_outcome(
         frequency=new_frequency))` calculates expected outcome conditional on
         the original `media`, `organic_media`, `organic_reach`,
         `organic_frequency`, `non_media_treatments` and `controls` tensors and
-        on the new given values for `reach` and `frequency` tensors.
+        on the new given values for `reach` and `frequency` tensors. The new
+        tensors' dimensions must match the dimensions of the corresponding
+        original tensors from `input_data`.
       selected_geos: Optional list of containing a subset of geos to include. By
         default, all geos are included.
       selected_times: Optional list of containing a subset of dates to include.
@@ -2182,9 +2186,12 @@ def marginal_roi(
       use_posterior: If `True` then the posterior distribution is calculated.
         Otherwise, the prior distribution is calculated.
       new_data: Optional. DataTensors containing `media`, `media_spend`,
-        `reach`, `frequency`, and `rf_spend` data with the same shape as
-        `meridian.input_data`. Used to compute mROI for alternative data.
-        Default uses the tensors from `meridian.input_data`.
+        `reach`, `frequency`, and `rf_spend` data. If provided, the marginal roi
+        is calculated using the values of the tensors passed in `new_data` and
+        the original values of all the remaining tensors. The new
+        tensors' dimensions must match the dimensions of the corresponding
+        original tensors from `meridian.input_data`. If `None`, the marginal roi
+        is calculated using the original values of all the tensors.
       selected_geos: Optional. Contains a subset of geos to include. By default,
         all geos are included.
       selected_times: Optional. Contains a subset of times to include. By
@@ -2342,9 +2349,12 @@ def roi(
       use_posterior: Boolean. If `True`, then the posterior distribution is
         calculated. Otherwise, the prior distribution is calculated.
       new_data: Optional. DataTensors containing `media`, `media_spend`,
-        `reach`, `frequency`, and `rf_spend` data with the same shape as
-        `meridian.input_data`. Used to compute ROI for alternative data. Default
-        uses the tensors from `meridian.input_data`.
+        `reach`, `frequency`, and `rf_spend` data. If provided, the roi is
+        calculated using the values of the tensors passed in `new_data` and the
+        original values of all the remaining tensors. The new tensors'
+        dimensions must match the dimensions of the corresponding original
+        tensors from `meridian.input_data`. If `None`, the roi is calculated
+        using the original values of all the tensors.
       selected_geos: Optional list containing a subset of geos to include. By
         default, all geos are included.
       selected_times: Optional list containing a subset of times to include. By
@@ -2453,9 +2463,12 @@ def cpik(
       use_posterior: Boolean. If `True` then the posterior distribution is
         calculated. Otherwise, the prior distribution is calculated.
       new_data: Optional. DataTensors containing `media`, `media_spend`,
-        `reach`, `frequency`, and `rf_spend` data with the same shape as
-        `meridian.input_data`. Used to compute CPIK for alternative data.
-        Default uses the tensors from `meridian.input_data`.
+        `reach`, `frequency`, and `rf_spend` data. If provided, the cpik is
+        calculated using the values of the tensors passed in `new_data` and the
+        original values of all the remaining tensors. The new tensors'
+        dimensions must match the dimensions of the corresponding original
+        tensors from `meridian.input_data`. If `None`, the cpik is calculated
+        using the original values of all the tensors.
       selected_geos: Optional list containing a subset of geos to include. By
         default, all geos are included.
       selected_times: Optional list containing a subset of times to include. By
@@ -2733,6 +2746,7 @@ def _calculate_baseline_expected_outcome(
   def _compute_incremental_outcome_aggregate(
       self,
       use_posterior: bool,
+      new_data: DataTensors | None = None,
       use_kpi: bool | None = None,
       include_non_paid_channels: bool = True,
       non_media_baseline_values: Sequence[str | float] | None = None,
@@ -2742,6 +2756,7 @@ def _compute_incremental_outcome_aggregate(
     use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
     incremental_outcome_m = self.incremental_outcome(
         use_posterior=use_posterior,
+        new_data=new_data,
         use_kpi=use_kpi,
         include_non_paid_channels=include_non_paid_channels,
         non_media_baseline_values=non_media_baseline_values,
@@ -2758,6 +2773,7 @@ def _compute_incremental_outcome_aggregate(
 
   def summary_metrics(
       self,
+      new_data: DataTensors | None = None,
       marginal_roi_by_reach: bool = True,
       marginal_roi_incremental_increase: float = 0.01,
       selected_geos: Sequence[str] | None = None,
@@ -2776,6 +2792,12 @@ def summary_metrics(
     for the aggregate `"All Paid Channels"` channel dimension.
 
     Args:
+      new_data: Optional `DataTensors` object. If provided, the summary metrics
+        are calculated using the values of the tensors passed in `new_data` and
+        the original values of all the remaining tensors. The new tensors'
+        dimensions must match the dimensions of the corresponding original
+        tensors from `meridian.input_data`. If `None`, the summary metrics are
+        calculated using the original values of all the tensors.
       marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the
         return on the next dollar spent. If this argument is `True`, the
         assumption is that the next dollar spent only impacts reach, holding
@@ -2830,6 +2852,7 @@ def summary_metrics(
     }
     batched_kwargs = {"batch_size": batch_size, **dim_kwargs}
     aggregated_impressions = self.get_aggregated_impressions(
+        new_data=new_data,
         optimal_frequency=optimal_frequency,
         include_non_paid_channels=include_non_paid_channels,
         **dim_kwargs,
@@ -2844,23 +2867,27 @@ def summary_metrics(
 
     incremental_outcome_prior = self._compute_incremental_outcome_aggregate(
         use_posterior=False,
+        new_data=new_data,
         use_kpi=use_kpi,
         include_non_paid_channels=include_non_paid_channels,
         **batched_kwargs,
     )
     incremental_outcome_posterior = self._compute_incremental_outcome_aggregate(
         use_posterior=True,
+        new_data=new_data,
         use_kpi=use_kpi,
         include_non_paid_channels=include_non_paid_channels,
         **batched_kwargs,
     )
     expected_outcome_prior = self.expected_outcome(
         use_posterior=False,
+        new_data=new_data,
         use_kpi=use_kpi,
         **batched_kwargs,
     )
     expected_outcome_posterior = self.expected_outcome(
         use_posterior=True,
+        new_data=new_data,
         use_kpi=use_kpi,
         **batched_kwargs,
     )
@@ -2969,10 +2996,13 @@ def summary_metrics(
     # If non-paid channels are not included, return the all, paid and non-paid
     # metrics.
     spend_list = []
+    new_spend_tensors = self._fill_missing_data_tensors(
+        new_data, [constants.MEDIA_SPEND, constants.RF_SPEND]
+    )
     if self._meridian.n_media_channels > 0:
-      spend_list.append(self._meridian.media_tensors.media_spend)
+      spend_list.append(new_spend_tensors.media_spend)
     if self._meridian.n_rf_channels > 0:
-      spend_list.append(self._meridian.rf_tensors.rf_spend)
+      spend_list.append(new_spend_tensors.rf_spend)
     # TODO Add support for 1-dimensional spend.
     aggregated_spend = self.filter_and_aggregate_geos_and_times(
         tensor=tf.concat(spend_list, axis=-1), **dim_kwargs
@@ -3005,6 +3035,7 @@ def summary_metrics(
         xr_coords=xr_coords_with_ci_and_distribution,
         confidence_level=confidence_level,
         spend_with_total=spend_with_total,
+        new_data=new_data,
         use_kpi=use_kpi,
         **batched_kwargs,
         # Drop mROI metric values in the Dataset's data_vars for the
@@ -3017,12 +3048,14 @@ def summary_metrics(
     cpik = self._compute_cpik_aggregate(
         incremental_kpi_prior=self._compute_incremental_outcome_aggregate(
             use_posterior=False,
+            new_data=new_data,
             use_kpi=True,
             include_non_paid_channels=False,
             **batched_kwargs,
         ),
         incremental_kpi_posterior=self._compute_incremental_outcome_aggregate(
             use_posterior=True,
+            new_data=new_data,
             use_kpi=True,
             include_non_paid_channels=False,
             **batched_kwargs,
@@ -3058,6 +3091,7 @@ def summary_metrics(
 
   def get_aggregated_impressions(
       self,
+      new_data: DataTensors | None = None,
       selected_geos: Sequence[str] | None = None,
       selected_times: Sequence[str] | None = None,
       aggregate_geos: bool = True,
@@ -3068,6 +3102,14 @@ def get_aggregated_impressions(
     """Computes aggregated impressions values in the data across all channels.
 
     Args:
+      new_data: An optional `DataTensors` object containing the new media,
+        reach, frequency, organic media, organic reach, and organic frequency
+        and non-media treatments tensors. If `new_data` argument is used, then
+        the aggregated impressions are computed using the values of the tensors
+        passed in the `new_data` argument and the original values of all the
+        remaining tensors. The new tensors' dimensions must match the dimensions
+        of the corresponding original tensors from `meridian.input_data`. If
+        `None`, the existing tensors from the Meridian object are used.
       selected_geos: Optional list containing a subset of geos to include. By
         default, all geos are included.
       selected_times: Optional list containing a subset of times to include. By
@@ -3088,50 +3130,53 @@ def get_aggregated_impressions(
       (or `(n_channels,)` if geos and times are aggregated) with aggregate
       impression values per channel.
     """
+    tensor_names_list = [
+        constants.MEDIA,
+        constants.REACH,
+        constants.FREQUENCY,
+    ]
+    if include_non_paid_channels:
+      tensor_names_list.extend([
+          constants.ORGANIC_MEDIA,
+          constants.ORGANIC_REACH,
+          constants.ORGANIC_FREQUENCY,
+          constants.NON_MEDIA_TREATMENTS,
+      ])
+    data_tensors = self._fill_missing_data_tensors(new_data, tensor_names_list)
     impressions_list = []
     if self._meridian.n_media_channels > 0:
       impressions_list.append(
-          self._meridian.media_tensors.media[:, -self._meridian.n_times :, :]
+          data_tensors.media[:, -self._meridian.n_times :, :]
       )
 
     if self._meridian.n_rf_channels > 0:
       if optimal_frequency is None:
-        new_frequency = self._meridian.rf_tensors.frequency
+        new_frequency = data_tensors.frequency
       else:
-        new_frequency = (
-            tf.ones_like(self._meridian.rf_tensors.frequency)
-            * optimal_frequency
-        )
+        new_frequency = tf.ones_like(data_tensors.frequency) * optimal_frequency
       impressions_list.append(
-          self._meridian.rf_tensors.reach[:, -self._meridian.n_times :, :]
+          data_tensors.reach[:, -self._meridian.n_times :, :]
           * new_frequency[:, -self._meridian.n_times :, :]
       )
 
     if include_non_paid_channels:
       if self._meridian.n_organic_media_channels > 0:
         impressions_list.append(
-            self._meridian.organic_media_tensors.organic_media[
-                :, -self._meridian.n_times :, :
-            ]
+            data_tensors.organic_media[:, -self._meridian.n_times :, :]
         )
       if self._meridian.n_organic_rf_channels > 0:
         if optimal_frequency is None:
-          new_organic_frequency = (
-              self._meridian.organic_rf_tensors.organic_frequency
-          )
+          new_organic_frequency = data_tensors.organic_frequency
         else:
           new_organic_frequency = (
-              tf.ones_like(self._meridian.organic_rf_tensors.organic_frequency)
-              * optimal_frequency
+              tf.ones_like(data_tensors.organic_frequency) * optimal_frequency
           )
         impressions_list.append(
-            self._meridian.organic_rf_tensors.organic_reach[
-                :, -self._meridian.n_times :, :
-            ]
+            data_tensors.organic_reach[:, -self._meridian.n_times :, :]
             * new_organic_frequency[:, -self._meridian.n_times :, :]
         )
       if self._meridian.n_non_media_channels > 0:
-        impressions_list.append(self._meridian.non_media_treatments)
+        impressions_list.append(data_tensors.non_media_treatments)
 
     return self.filter_and_aggregate_geos_and_times(
         tensor=tf.concat(impressions_list, axis=-1),
@@ -3293,7 +3338,9 @@ def _counterfactual_metric_dataset(
         generated. If `False`, prior counterfactual metrics are generated.
       new_data: Optional DataTensors. When specified, it contains the
         counterfactual media, reach, frequency, media_spend, and rf_spend
-        values. Default uses the tensors from `meridian.input_data`.
+        values. The new tensors' dimensions must match the dimensions of the
+        corresponding original tensors from `meridian.input_data`. Default uses
+        the tensors from `meridian.input_data`.
       marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the
         return on the next dollar spent. If this argument is `True`, the
         assumption is that the next dollar spent only impacts reach, holding
@@ -4554,19 +4601,25 @@ def _compute_marginal_roi_aggregate(
       xr_dims: Sequence[str],
       xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
       spend_with_total: tf.Tensor,
+      new_data: DataTensors | None = None,
       use_kpi: bool = False,
       confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
       **roi_kwargs,
   ) -> xr.Dataset:
+    data_tensors = self._fill_missing_data_tensors(
+        new_data, [constants.MEDIA, constants.REACH, constants.FREQUENCY]
+    )
     mroi_prior = self.marginal_roi(
         use_posterior=False,
+        new_data=data_tensors,
         by_reach=marginal_roi_by_reach,
         incremental_increase=marginal_roi_incremental_increase,
         use_kpi=use_kpi,
         **roi_kwargs,
     )
     mroi_posterior = self.marginal_roi(
         use_posterior=True,
+        new_data=data_tensors,
         by_reach=marginal_roi_by_reach,
         incremental_increase=marginal_roi_incremental_increase,
         use_kpi=use_kpi,
@@ -4575,13 +4628,13 @@ def _compute_marginal_roi_aggregate(
     # TODO: Organize the arguments passed between the functions
     # using DataTensors.
     incremented_tensors = _scale_tensors_by_multiplier(
-        media=self._meridian.media_tensors.media,
-        reach=self._meridian.rf_tensors.reach,
-        frequency=self._meridian.rf_tensors.frequency,
+        media=data_tensors.media,
+        reach=data_tensors.reach,
+        frequency=data_tensors.frequency,
         multiplier=(1 + marginal_roi_incremental_increase),
         by_reach=marginal_roi_by_reach,
     )
-    new_data = DataTensors(
+    incremented_data = DataTensors(
         media=(
             incremented_tensors["new_media"]
             if "new_media" in incremented_tensors
@@ -4602,7 +4655,7 @@ def _compute_marginal_roi_aggregate(
     mroi_prior_total = (
         self.expected_outcome(
             use_posterior=False,
-            new_data=new_data,
+            new_data=incremented_data,
             use_kpi=use_kpi,
             **roi_kwargs,
         )
@@ -4611,7 +4664,7 @@ def _compute_marginal_roi_aggregate(
     mroi_posterior_total = (
         self.expected_outcome(
             use_posterior=True,
-            new_data=new_data,
+            new_data=incremented_data,
             use_kpi=use_kpi,
             **roi_kwargs,
         )