Skip to content

Commit

Permalink
Add new_data argument to summary_metrics.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 715248061
  • Loading branch information
lukmaz authored and The Meridian Authors committed Jan 21, 2025
1 parent 7757c4b commit 64585da
Show file tree
Hide file tree
Showing 5 changed files with 626 additions and 168 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ To release a new version (e.g. from `1.0.0` -> `2.0.0`):
-->

## [Unreleased]

## [0.17.0] - 2025-01-16
* Add `new_data` argument to `Analyzer.summary_metrics` method.
* Define constants for channel constraints in the optimizer.

## [0.16.0] - 2025-01-08
Expand Down Expand Up @@ -148,6 +151,7 @@ To release a new version (e.g. from `1.0.0` -> `2.0.0`):
[0.14.0]: https://github.com/google/meridian/releases/tag/v0.14.0
[0.15.0]: https://github.com/google/meridian/releases/tag/v0.15.0
[0.16.0]: https://github.com/google/meridian/releases/tag/v0.16.0
[Unreleased]: https://github.com/google/meridian/compare/v0.16.0...HEAD
[0.17.0]: https://github.com/google/meridian/releases/tag/v0.17.0
[Unreleased]: https://github.com/google/meridian/compare/v0.17.0...HEAD


2 changes: 1 addition & 1 deletion meridian/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

"""Meridian API."""

__version__ = "0.16.0"
__version__ = "0.17.0"


from meridian import analysis
Expand Down
131 changes: 92 additions & 39 deletions meridian/analysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,9 @@ def _get_scaled_data_tensors(
`revenue_per_kpi`. If `None`, the original scaled tensors from the
Meridian object are used. If `new_data` is provided, the output contains
the scaled versions of the tensors in `new_data` and the original scaled
versions of all the remaining tensors.
versions of all the remaining tensors. The new tensors' dimensions must
match the dimensions of the corresponding original tensors from
`meridian.input_data`.
include_non_paid_channels: Boolean. If `True`, organic media, organic RF
and non-media treatments data is included in the output.
Expand Down Expand Up @@ -1146,7 +1148,9 @@ def expected_outcome(
frequency=new_frequency))` calculates expected outcome conditional on
the original `media`, `organic_media`, `organic_reach`,
`organic_frequency`, `non_media_treatments` and `controls` tensors and
on the new given values for `reach` and `frequency` tensors.
on the new given values for `reach` and `frequency` tensors. The new
tensors' dimensions must match the dimensions of the corresponding
original tensors from `input_data`.
selected_geos: Optional list of containing a subset of geos to include. By
default, all geos are included.
selected_times: Optional list of containing a subset of dates to include.
Expand Down Expand Up @@ -2182,9 +2186,12 @@ def marginal_roi(
use_posterior: If `True` then the posterior distribution is calculated.
Otherwise, the prior distribution is calculated.
new_data: Optional. DataTensors containing `media`, `media_spend`,
`reach`, `frequency`, and `rf_spend` data with the same shape as
`meridian.input_data`. Used to compute mROI for alternative data.
Default uses the tensors from `meridian.input_data`.
`reach`, `frequency`, and `rf_spend` data. If provided, the marginal roi
is calculated using the values of the tensors passed in `new_data` and
the original values of all the remaining tensors. The new
tensors' dimensions must match the dimensions of the corresponding
original tensors from `meridian.input_data`. If `None`, the marginal roi
is calculated using the original values of all the tensors.
selected_geos: Optional. Contains a subset of geos to include. By default,
all geos are included.
selected_times: Optional. Contains a subset of times to include. By
Expand Down Expand Up @@ -2342,9 +2349,12 @@ def roi(
use_posterior: Boolean. If `True`, then the posterior distribution is
calculated. Otherwise, the prior distribution is calculated.
new_data: Optional. DataTensors containing `media`, `media_spend`,
`reach`, `frequency`, and `rf_spend` data with the same shape as
`meridian.input_data`. Used to compute ROI for alternative data. Default
uses the tensors from `meridian.input_data`.
`reach`, `frequency`, and `rf_spend` data. If provided, the roi is
calculated using the values of the tensors passed in `new_data` and the
original values of all the remaining tensors. The new tensors'
dimensions must match the dimensions of the corresponding original
tensors from `meridian.input_data`. If `None`, the roi is calculated
using the original values of all the tensors.
selected_geos: Optional list containing a subset of geos to include. By
default, all geos are included.
selected_times: Optional list containing a subset of times to include. By
Expand Down Expand Up @@ -2453,9 +2463,12 @@ def cpik(
use_posterior: Boolean. If `True` then the posterior distribution is
calculated. Otherwise, the prior distribution is calculated.
new_data: Optional. DataTensors containing `media`, `media_spend`,
`reach`, `frequency`, and `rf_spend` data with the same shape as
`meridian.input_data`. Used to compute CPIK for alternative data.
Default uses the tensors from `meridian.input_data`.
`reach`, `frequency`, and `rf_spend` data. If provided, the cpik is
calculated using the values of the tensors passed in `new_data` and the
original values of all the remaining tensors. The new tensors'
dimensions must match the dimensions of the corresponding original
tensors from `meridian.input_data`. If `None`, the cpik is calculated
using the original values of all the tensors.
selected_geos: Optional list containing a subset of geos to include. By
default, all geos are included.
selected_times: Optional list containing a subset of times to include. By
Expand Down Expand Up @@ -2733,6 +2746,7 @@ def _calculate_baseline_expected_outcome(
def _compute_incremental_outcome_aggregate(
self,
use_posterior: bool,
new_data: DataTensors | None = None,
use_kpi: bool | None = None,
include_non_paid_channels: bool = True,
non_media_baseline_values: Sequence[str | float] | None = None,
Expand All @@ -2742,6 +2756,7 @@ def _compute_incremental_outcome_aggregate(
use_kpi = use_kpi or self._meridian.input_data.revenue_per_kpi is None
incremental_outcome_m = self.incremental_outcome(
use_posterior=use_posterior,
new_data=new_data,
use_kpi=use_kpi,
include_non_paid_channels=include_non_paid_channels,
non_media_baseline_values=non_media_baseline_values,
Expand All @@ -2758,6 +2773,7 @@ def _compute_incremental_outcome_aggregate(

def summary_metrics(
self,
new_data: DataTensors | None = None,
marginal_roi_by_reach: bool = True,
marginal_roi_incremental_increase: float = 0.01,
selected_geos: Sequence[str] | None = None,
Expand All @@ -2776,6 +2792,12 @@ def summary_metrics(
for the aggregate `"All Paid Channels"` channel dimension.
Args:
new_data: Optional `DataTensors` object. If provided, the summary metrics
are calculated using the values of the tensors passed in `new_data` and
the original values of all the remaining tensors. The new tensors'
dimensions must match the dimensions of the corresponding original
tensors from `meridian.input_data`. If `None`, the summary metrics are
calculated using the original values of all the tensors.
marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the
return on the next dollar spent. If this argument is `True`, the
assumption is that the next dollar spent only impacts reach, holding
Expand Down Expand Up @@ -2830,6 +2852,7 @@ def summary_metrics(
}
batched_kwargs = {"batch_size": batch_size, **dim_kwargs}
aggregated_impressions = self.get_aggregated_impressions(
new_data=new_data,
optimal_frequency=optimal_frequency,
include_non_paid_channels=include_non_paid_channels,
**dim_kwargs,
Expand All @@ -2844,23 +2867,27 @@ def summary_metrics(

incremental_outcome_prior = self._compute_incremental_outcome_aggregate(
use_posterior=False,
new_data=new_data,
use_kpi=use_kpi,
include_non_paid_channels=include_non_paid_channels,
**batched_kwargs,
)
incremental_outcome_posterior = self._compute_incremental_outcome_aggregate(
use_posterior=True,
new_data=new_data,
use_kpi=use_kpi,
include_non_paid_channels=include_non_paid_channels,
**batched_kwargs,
)
expected_outcome_prior = self.expected_outcome(
use_posterior=False,
new_data=new_data,
use_kpi=use_kpi,
**batched_kwargs,
)
expected_outcome_posterior = self.expected_outcome(
use_posterior=True,
new_data=new_data,
use_kpi=use_kpi,
**batched_kwargs,
)
Expand Down Expand Up @@ -2969,10 +2996,13 @@ def summary_metrics(
# If non-paid channels are not included, return the all, paid and non-paid
# metrics.
spend_list = []
new_spend_tensors = self._fill_missing_data_tensors(
new_data, [constants.MEDIA_SPEND, constants.RF_SPEND]
)
if self._meridian.n_media_channels > 0:
spend_list.append(self._meridian.media_tensors.media_spend)
spend_list.append(new_spend_tensors.media_spend)
if self._meridian.n_rf_channels > 0:
spend_list.append(self._meridian.rf_tensors.rf_spend)
spend_list.append(new_spend_tensors.rf_spend)
# TODO Add support for 1-dimensional spend.
aggregated_spend = self.filter_and_aggregate_geos_and_times(
tensor=tf.concat(spend_list, axis=-1), **dim_kwargs
Expand Down Expand Up @@ -3005,6 +3035,7 @@ def summary_metrics(
xr_coords=xr_coords_with_ci_and_distribution,
confidence_level=confidence_level,
spend_with_total=spend_with_total,
new_data=new_data,
use_kpi=use_kpi,
**batched_kwargs,
# Drop mROI metric values in the Dataset's data_vars for the
Expand All @@ -3017,12 +3048,14 @@ def summary_metrics(
cpik = self._compute_cpik_aggregate(
incremental_kpi_prior=self._compute_incremental_outcome_aggregate(
use_posterior=False,
new_data=new_data,
use_kpi=True,
include_non_paid_channels=False,
**batched_kwargs,
),
incremental_kpi_posterior=self._compute_incremental_outcome_aggregate(
use_posterior=True,
new_data=new_data,
use_kpi=True,
include_non_paid_channels=False,
**batched_kwargs,
Expand Down Expand Up @@ -3058,6 +3091,7 @@ def summary_metrics(

def get_aggregated_impressions(
self,
new_data: DataTensors | None = None,
selected_geos: Sequence[str] | None = None,
selected_times: Sequence[str] | None = None,
aggregate_geos: bool = True,
Expand All @@ -3068,6 +3102,14 @@ def get_aggregated_impressions(
"""Computes aggregated impressions values in the data across all channels.
Args:
new_data: An optional `DataTensors` object containing the new media,
reach, frequency, organic media, organic reach, and organic frequency
and non-media treatments tensors. If `new_data` argument is used, then
the aggregated impressions are computed using the values of the tensors
passed in the `new_data` argument and the original values of all the
remaining tensors. The new tensors' dimensions must match the dimensions
of the corresponding original tensors from `meridian.input_data`. If
`None`, the existing tensors from the Meridian object are used.
selected_geos: Optional list containing a subset of geos to include. By
default, all geos are included.
selected_times: Optional list containing a subset of times to include. By
Expand All @@ -3088,50 +3130,53 @@ def get_aggregated_impressions(
(or `(n_channels,)` if geos and times are aggregated) with aggregate
impression values per channel.
"""
tensor_names_list = [
constants.MEDIA,
constants.REACH,
constants.FREQUENCY,
]
if include_non_paid_channels:
tensor_names_list.extend([
constants.ORGANIC_MEDIA,
constants.ORGANIC_REACH,
constants.ORGANIC_FREQUENCY,
constants.NON_MEDIA_TREATMENTS,
])
data_tensors = self._fill_missing_data_tensors(new_data, tensor_names_list)
impressions_list = []
if self._meridian.n_media_channels > 0:
impressions_list.append(
self._meridian.media_tensors.media[:, -self._meridian.n_times :, :]
data_tensors.media[:, -self._meridian.n_times :, :]
)

if self._meridian.n_rf_channels > 0:
if optimal_frequency is None:
new_frequency = self._meridian.rf_tensors.frequency
new_frequency = data_tensors.frequency
else:
new_frequency = (
tf.ones_like(self._meridian.rf_tensors.frequency)
* optimal_frequency
)
new_frequency = tf.ones_like(data_tensors.frequency) * optimal_frequency
impressions_list.append(
self._meridian.rf_tensors.reach[:, -self._meridian.n_times :, :]
data_tensors.reach[:, -self._meridian.n_times :, :]
* new_frequency[:, -self._meridian.n_times :, :]
)

if include_non_paid_channels:
if self._meridian.n_organic_media_channels > 0:
impressions_list.append(
self._meridian.organic_media_tensors.organic_media[
:, -self._meridian.n_times :, :
]
data_tensors.organic_media[:, -self._meridian.n_times :, :]
)
if self._meridian.n_organic_rf_channels > 0:
if optimal_frequency is None:
new_organic_frequency = (
self._meridian.organic_rf_tensors.organic_frequency
)
new_organic_frequency = data_tensors.organic_frequency
else:
new_organic_frequency = (
tf.ones_like(self._meridian.organic_rf_tensors.organic_frequency)
* optimal_frequency
tf.ones_like(data_tensors.organic_frequency) * optimal_frequency
)
impressions_list.append(
self._meridian.organic_rf_tensors.organic_reach[
:, -self._meridian.n_times :, :
]
data_tensors.organic_reach[:, -self._meridian.n_times :, :]
* new_organic_frequency[:, -self._meridian.n_times :, :]
)
if self._meridian.n_non_media_channels > 0:
impressions_list.append(self._meridian.non_media_treatments)
impressions_list.append(data_tensors.non_media_treatments)

return self.filter_and_aggregate_geos_and_times(
tensor=tf.concat(impressions_list, axis=-1),
Expand Down Expand Up @@ -3293,7 +3338,9 @@ def _counterfactual_metric_dataset(
generated. If `False`, prior counterfactual metrics are generated.
new_data: Optional DataTensors. When specified, it contains the
counterfactual media, reach, frequency, media_spend, and rf_spend
values. Default uses the tensors from `meridian.input_data`.
values. The new tensors' dimensions must match the dimensions of the
corresponding original tensors from `meridian.input_data`. Default uses
the tensors from `meridian.input_data`.
marginal_roi_by_reach: Boolean. Marginal ROI (mROI) is defined as the
return on the next dollar spent. If this argument is `True`, the
assumption is that the next dollar spent only impacts reach, holding
Expand Down Expand Up @@ -4554,19 +4601,25 @@ def _compute_marginal_roi_aggregate(
xr_dims: Sequence[str],
xr_coords: Mapping[str, tuple[Sequence[str], Sequence[str]]],
spend_with_total: tf.Tensor,
new_data: DataTensors | None = None,
use_kpi: bool = False,
confidence_level: float = constants.DEFAULT_CONFIDENCE_LEVEL,
**roi_kwargs,
) -> xr.Dataset:
data_tensors = self._fill_missing_data_tensors(
new_data, [constants.MEDIA, constants.REACH, constants.FREQUENCY]
)
mroi_prior = self.marginal_roi(
use_posterior=False,
new_data=data_tensors,
by_reach=marginal_roi_by_reach,
incremental_increase=marginal_roi_incremental_increase,
use_kpi=use_kpi,
**roi_kwargs,
)
mroi_posterior = self.marginal_roi(
use_posterior=True,
new_data=data_tensors,
by_reach=marginal_roi_by_reach,
incremental_increase=marginal_roi_incremental_increase,
use_kpi=use_kpi,
Expand All @@ -4575,13 +4628,13 @@ def _compute_marginal_roi_aggregate(
# TODO: Organize the arguments passed between the functions
# using DataTensors.
incremented_tensors = _scale_tensors_by_multiplier(
media=self._meridian.media_tensors.media,
reach=self._meridian.rf_tensors.reach,
frequency=self._meridian.rf_tensors.frequency,
media=data_tensors.media,
reach=data_tensors.reach,
frequency=data_tensors.frequency,
multiplier=(1 + marginal_roi_incremental_increase),
by_reach=marginal_roi_by_reach,
)
new_data = DataTensors(
incremented_data = DataTensors(
media=(
incremented_tensors["new_media"]
if "new_media" in incremented_tensors
Expand All @@ -4602,7 +4655,7 @@ def _compute_marginal_roi_aggregate(
mroi_prior_total = (
self.expected_outcome(
use_posterior=False,
new_data=new_data,
new_data=incremented_data,
use_kpi=use_kpi,
**roi_kwargs,
)
Expand All @@ -4611,7 +4664,7 @@ def _compute_marginal_roi_aggregate(
mroi_posterior_total = (
self.expected_outcome(
use_posterior=True,
new_data=new_data,
new_data=incremented_data,
use_kpi=use_kpi,
**roi_kwargs,
)
Expand Down
Loading

0 comments on commit 64585da

Please sign in to comment.