From b0b0fd1582a7d7ac19027025a910e85857bd2991 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 09:44:43 +0200 Subject: [PATCH 01/18] changed awkward operations to numpy in yield_stdev --- src/cabinetry/model_utils.py | 91 ++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index f336e60b..3f0eae74 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -4,13 +4,6 @@ import json import logging from typing import Any, DefaultDict, Dict, List, NamedTuple, Optional, Tuple, Union - -try: - # use awkward v2 - import awkward._v2 as ak -except ModuleNotFoundError: # pragma: no cover - # fallback if the _v2 submodule disappears after the full v2 release - import awkward as ak # pragma: no cover import numpy as np import pyhf @@ -300,16 +293,21 @@ def yield_stdev( up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) # turn into list of channels (keep all samples, select correct bins per channel) # indices: channel, sample, bin - up_yields = [ + up_yields_per_channel = [ up_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels ] - # append list of yields summed per channel - up_yields += [ - np.asarray(np.sum(chan_yields, axis=-1, keepdims=True)) - for chan_yields in up_yields - ] - # indices: variation, channel, sample, bin - up_variations.append(up_yields) + # calculate list of yields summed per channel + up_yields_sum = np.array([ + np.sum(chan_yields, axis=-1, keepdims=True).tolist() + for chan_yields in up_yields_per_channel + ]) + # reshape to get rid of extra axis + up_yields_sum = up_yields_sum.reshape(up_yields_sum.shape[:-1]).T + + # concatenate per-channel sum to up_comb + up_yields = np.concatenate((up_comb, up_yields_sum), axis=1) + # indices: variation, sample, bin + up_variations.append(up_yields.tolist()) # model distribution per sample with this parameter varied down down_comb = pyhf.tensorlib.to_numpy( @@ -318,30 +316,33 @@ def yield_stdev( # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) # turn into list of channels - down_yields = [ - down_comb[:, model.config.channel_slices[ch]] - for ch in model.config.channels - ] - # append list of yields summed per channel - down_yields += [ - np.asarray(np.sum(chan_yields, axis=-1, keepdims=True)) - for chan_yields in down_yields + down_yields_per_channel = [ + down_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels ] + + # calculate list of yields summed per channel + down_yields_sum = np.array([ + np.sum(chan_yields, axis=-1, keepdims=True).tolist() + for chan_yields in down_yields_per_channel + ]) + down_yields_sum = down_yields_sum.reshape(down_yields_sum.shape[:-1]).T + + down_yields = np.concatenate((down_comb, down_yields_sum), axis=1) down_variations.append(down_yields) - # convert to awkward arrays for further processing - up_variations_ak = ak.from_iter(up_variations) - down_variations_ak = ak.from_iter(down_variations) + # convert to numpy arrays for further processing + up_variations_np = np.array(up_variations) + down_variations_np = np.array(down_variations) # calculate symmetric uncertainties for all components # indices: variation, channel (last entries sums), sample (last entry sum), bin - sym_uncs = (up_variations_ak - down_variations_ak) / 2 + sym_uncs = (up_variations_np - down_variations_np) / 2 - # calculate total variance, indexed by channel, sample, bin (per-channel numbers act + # calculate total variance, indexed by sample, bin (per-channel numbers act # like additional channels with one bin each) if np.count_nonzero(corr_mat - np.diagflat(np.ones_like(parameters))) == 0: # no off-diagonal contributions from correlation matrix (e.g. pre-fit) - total_variance = ak.sum(np.power(sym_uncs, 2), axis=0) + total_variance = np.sum(np.power(sym_uncs, 2), axis=0) else: # full calculation including off-diagonal contributions # with v as vector of variations (each element contains yields under variation) @@ -349,7 +350,7 @@ def yield_stdev( # variance = sum_i sum_j v[i] * M[i, j] * v[j] # where the product between elements of v again is elementwise (multiplying bin # yields), and the final variance shape is the same as element of v (yield - # uncertainties per bin, sample, channel) + # uncertainties per bin, sample) # possible optimizations that could be considered here: # - skipping staterror-staterror terms for per-bin calculation (orthogonal) @@ -357,29 +358,29 @@ def yield_stdev( # - (optional) skipping combinations with correlations below threshold # calculate M[i, j] * v[j] first - # indices: pars (i), pars (j), channel, sample, bin + # indices: pars (i), pars (j), sample, bin m_times_v = ( - corr_mat[..., np.newaxis, np.newaxis, np.newaxis] + corr_mat[..., np.newaxis, np.newaxis] * sym_uncs[np.newaxis, ...] ) - # now multiply by v[i] as well, indices: pars(i), pars(j), channel, sample, bin + # now multiply by v[i] as well, indices: pars(i), pars(j), sample, bin v_times_m_times_v = sym_uncs[:, np.newaxis, ...] * m_times_v - # finally perform sums over i and j, remaining indices: channel, sample, bin - total_variance = ak.sum(ak.sum(v_times_m_times_v, axis=1), axis=0) + # finally perform sums over i and j, remaining indices: sample, bin + total_variance = np.sum(np.sum(v_times_m_times_v, axis=1), axis=0) # convert to standard deviations per bin and per channel - n_channels = len(model.config.channels) - # indices: (channel, sample, bin) - total_stdev_per_bin = np.sqrt(total_variance[:n_channels]) + # calculate separation between per-bin indices and per-channel indices + last_bin_index = model.config.channel_slices[model.config.channels[-1]].stop # indices: (channel, sample) - total_stdev_per_channel = ak.sum(np.sqrt(total_variance[n_channels:]), axis=-1) + total_stdev_per_channel = np.sqrt(total_variance[:,last_bin_index:].T).tolist() + # indices: (channel, sample, bin) + total_stdev_per_bin = [ + np.sqrt(total_variance[:,:last_bin_index][:, model.config.channel_slices[ch]]).tolist() + for ch in model.config.channels + ] # log total stdev per bin / channel (-1 index for sample sum) - log.debug(f"total stdev is {total_stdev_per_bin[:, -1, :]}") - log.debug(f"total stdev per channel is {total_stdev_per_channel[:, -1]}") - - # convert to lists - total_stdev_per_bin = ak.to_list(total_stdev_per_bin) - total_stdev_per_channel = ak.to_list(total_stdev_per_channel) + log.debug(f"total stdev is {[total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))]}") + log.debug(f"total stdev per channel is {[total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))]}") # save to cache _YIELD_STDEV_CACHE.update( From 1e1d5a01864b5058380f63e9239d7a798f56268c Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 12:14:34 +0200 Subject: [PATCH 02/18] run black on model_utils --- src/cabinetry/model_utils.py | 48 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 3f0eae74..bf7c20e3 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -297,13 +297,15 @@ def yield_stdev( up_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels ] # calculate list of yields summed per channel - up_yields_sum = np.array([ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() - for chan_yields in up_yields_per_channel - ]) + up_yields_sum = np.array( + [ + np.sum(chan_yields, axis=-1, keepdims=True).tolist() + for chan_yields in up_yields_per_channel + ] + ) # reshape to get rid of extra axis up_yields_sum = up_yields_sum.reshape(up_yields_sum.shape[:-1]).T - + # concatenate per-channel sum to up_comb up_yields = np.concatenate((up_comb, up_yields_sum), axis=1) # indices: variation, sample, bin @@ -317,16 +319,19 @@ def yield_stdev( down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) # turn into list of channels down_yields_per_channel = [ - down_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels + down_comb[:, model.config.channel_slices[ch]] + for ch in model.config.channels ] - + # calculate list of yields summed per channel - down_yields_sum = np.array([ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() - for chan_yields in down_yields_per_channel - ]) + down_yields_sum = np.array( + [ + np.sum(chan_yields, axis=-1, keepdims=True).tolist() + for chan_yields in down_yields_per_channel + ] + ) down_yields_sum = down_yields_sum.reshape(down_yields_sum.shape[:-1]).T - + down_yields = np.concatenate((down_comb, down_yields_sum), axis=1) down_variations.append(down_yields) @@ -359,10 +364,7 @@ def yield_stdev( # calculate M[i, j] * v[j] first # indices: pars (i), pars (j), sample, bin - m_times_v = ( - corr_mat[..., np.newaxis, np.newaxis] - * sym_uncs[np.newaxis, ...] - ) + m_times_v = corr_mat[..., np.newaxis, np.newaxis] * sym_uncs[np.newaxis, ...] # now multiply by v[i] as well, indices: pars(i), pars(j), sample, bin v_times_m_times_v = sym_uncs[:, np.newaxis, ...] * m_times_v # finally perform sums over i and j, remaining indices: sample, bin @@ -372,15 +374,21 @@ def yield_stdev( # calculate separation between per-bin indices and per-channel indices last_bin_index = model.config.channel_slices[model.config.channels[-1]].stop # indices: (channel, sample) - total_stdev_per_channel = np.sqrt(total_variance[:,last_bin_index:].T).tolist() + total_stdev_per_channel = np.sqrt(total_variance[:, last_bin_index:].T).tolist() # indices: (channel, sample, bin) total_stdev_per_bin = [ - np.sqrt(total_variance[:,:last_bin_index][:, model.config.channel_slices[ch]]).tolist() + np.sqrt( + total_variance[:, :last_bin_index][:, model.config.channel_slices[ch]] + ).tolist() for ch in model.config.channels ] # log total stdev per bin / channel (-1 index for sample sum) - log.debug(f"total stdev is {[total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))]}") - log.debug(f"total stdev per channel is {[total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))]}") + log.debug( + f"total stdev is {[total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))]}" + ) + log.debug( + f"total stdev per channel is {[total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))]}" + ) # save to cache _YIELD_STDEV_CACHE.update( From b94094e1b5633ce860b835fc20b6e5439a3c5bad Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 14:10:31 +0200 Subject: [PATCH 03/18] added import spacing and reduces line lengths for flake8 --- src/cabinetry/model_utils.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index bf7c20e3..6e1a0a5e 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -4,6 +4,7 @@ import json import logging from typing import Any, DefaultDict, Dict, List, NamedTuple, Optional, Tuple, Union + import numpy as np import pyhf @@ -383,12 +384,14 @@ def yield_stdev( for ch in model.config.channels ] # log total stdev per bin / channel (-1 index for sample sum) - log.debug( - f"total stdev is {[total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))]}" - ) - log.debug( - f"total stdev per channel is {[total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))]}" - ) + total_stdev_bin = [ + total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) + ] + log.debug(f"total stdev is {total_stdev_bin}") + total_stdev_chan = [ + total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel)) + ] + log.debug(f"total stdev per channel is {total_stdev_chan}") # save to cache _YIELD_STDEV_CACHE.update( From 8a58030a1d4d5dea46fe7e5dde623ff152538fa8 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 15:11:56 +0200 Subject: [PATCH 04/18] get rid of unnecessary tolist --- src/cabinetry/model_utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 6e1a0a5e..30ac49a0 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -300,7 +300,7 @@ def yield_stdev( # calculate list of yields summed per channel up_yields_sum = np.array( [ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() + np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in up_yields_per_channel ] ) @@ -327,7 +327,7 @@ def yield_stdev( # calculate list of yields summed per channel down_yields_sum = np.array( [ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() + np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in down_yields_per_channel ] ) @@ -384,14 +384,14 @@ def yield_stdev( for ch in model.config.channels ] # log total stdev per bin / channel (-1 index for sample sum) - total_stdev_bin = [ - total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) - ] - log.debug(f"total stdev is {total_stdev_bin}") - total_stdev_chan = [ - total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel)) - ] - log.debug(f"total stdev per channel is {total_stdev_chan}") + total_stdev_bin = [total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))] + log.debug( + f"total stdev is {total_stdev_bin}" + ) + total_stdev_chan = [total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))] + log.debug( + f"total stdev per channel is {total_stdev_chan}" + ) # save to cache _YIELD_STDEV_CACHE.update( From c6bef1d74a189c32a28c12770903215266266564 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 15:17:20 +0200 Subject: [PATCH 05/18] Revert "get rid of unnecessary tolist" This reverts commit 8a58030a1d4d5dea46fe7e5dde623ff152538fa8. --- src/cabinetry/model_utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 30ac49a0..6e1a0a5e 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -300,7 +300,7 @@ def yield_stdev( # calculate list of yields summed per channel up_yields_sum = np.array( [ - np.sum(chan_yields, axis=-1, keepdims=True) + np.sum(chan_yields, axis=-1, keepdims=True).tolist() for chan_yields in up_yields_per_channel ] ) @@ -327,7 +327,7 @@ def yield_stdev( # calculate list of yields summed per channel down_yields_sum = np.array( [ - np.sum(chan_yields, axis=-1, keepdims=True) + np.sum(chan_yields, axis=-1, keepdims=True).tolist() for chan_yields in down_yields_per_channel ] ) @@ -384,14 +384,14 @@ def yield_stdev( for ch in model.config.channels ] # log total stdev per bin / channel (-1 index for sample sum) - total_stdev_bin = [total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin))] - log.debug( - f"total stdev is {total_stdev_bin}" - ) - total_stdev_chan = [total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel))] - log.debug( - f"total stdev per channel is {total_stdev_chan}" - ) + total_stdev_bin = [ + total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) + ] + log.debug(f"total stdev is {total_stdev_bin}") + total_stdev_chan = [ + total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel)) + ] + log.debug(f"total stdev per channel is {total_stdev_chan}") # save to cache _YIELD_STDEV_CACHE.update( From 4fb9614e4f8e7648870c824a15744f28af186813 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Wed, 31 May 2023 15:19:35 +0200 Subject: [PATCH 06/18] change to stack and get rid of unnecessary tolist in yield sum --- src/cabinetry/model_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 6e1a0a5e..0e726641 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -298,9 +298,9 @@ def yield_stdev( up_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels ] # calculate list of yields summed per channel - up_yields_sum = np.array( + up_yields_sum = np.stack( [ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() + np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in up_yields_per_channel ] ) @@ -325,9 +325,9 @@ def yield_stdev( ] # calculate list of yields summed per channel - down_yields_sum = np.array( + down_yields_sum = np.stack( [ - np.sum(chan_yields, axis=-1, keepdims=True).tolist() + np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in down_yields_per_channel ] ) From 48f1ac08f2a62f26f2d4ab05cdfa4587c0cfe832 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Fri, 30 Jun 2023 11:54:26 +0200 Subject: [PATCH 07/18] *_yields_sum -> *_yields_channel_sum --- src/cabinetry/model_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 0e726641..7629ba4e 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -298,17 +298,17 @@ def yield_stdev( up_comb[:, model.config.channel_slices[ch]] for ch in model.config.channels ] # calculate list of yields summed per channel - up_yields_sum = np.stack( + up_yields_channel_sum = np.stack( [ np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in up_yields_per_channel ] ) # reshape to get rid of extra axis - up_yields_sum = up_yields_sum.reshape(up_yields_sum.shape[:-1]).T + up_yields_channel_sum = up_yields_channel_sum.reshape(up_yields_channel_sum.shape[:-1]).T # concatenate per-channel sum to up_comb - up_yields = np.concatenate((up_comb, up_yields_sum), axis=1) + up_yields = np.concatenate((up_comb, up_yields_channel_sum), axis=1) # indices: variation, sample, bin up_variations.append(up_yields.tolist()) @@ -325,15 +325,15 @@ def yield_stdev( ] # calculate list of yields summed per channel - down_yields_sum = np.stack( + down_yields_channel_sum = np.stack( [ np.sum(chan_yields, axis=-1, keepdims=True) for chan_yields in down_yields_per_channel ] ) - down_yields_sum = down_yields_sum.reshape(down_yields_sum.shape[:-1]).T + down_yields_channel_sum = down_yields_channel_sum.reshape(down_yields_channel_sum.shape[:-1]).T - down_yields = np.concatenate((down_comb, down_yields_sum), axis=1) + down_yields = np.concatenate((down_comb, down_yields_channel_sum), axis=1) down_variations.append(down_yields) # convert to numpy arrays for further processing From 35937e42adc24408ca79273e32f8a4ec40622300 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:55:06 +0200 Subject: [PATCH 08/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 7629ba4e..9ab14be7 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -304,7 +304,8 @@ def yield_stdev( for chan_yields in up_yields_per_channel ] ) - # reshape to get rid of extra axis + # reshape to drop bin axis, transpose to turn channel axis into new bin axis + # (channel, sample, bin) -> (sample, bin) where "bin" becomes channel sums up_yields_channel_sum = up_yields_channel_sum.reshape(up_yields_channel_sum.shape[:-1]).T # concatenate per-channel sum to up_comb From fd70393eae53191eaa96c20a48be53714d57fa27 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:55:22 +0200 Subject: [PATCH 09/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 9ab14be7..3ef9ca2c 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -338,8 +338,8 @@ def yield_stdev( down_variations.append(down_yields) # convert to numpy arrays for further processing - up_variations_np = np.array(up_variations) - down_variations_np = np.array(down_variations) + up_variations_np = np.asarray(up_variations) + down_variations_np = np.asarray(down_variations) # calculate symmetric uncertainties for all components # indices: variation, channel (last entries sums), sample (last entry sum), bin From 402ab2dad4bd4931078bde4b3e9a11960f8cdea2 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:55:43 +0200 Subject: [PATCH 10/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 3ef9ca2c..71545c53 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -345,8 +345,8 @@ def yield_stdev( # indices: variation, channel (last entries sums), sample (last entry sum), bin sym_uncs = (up_variations_np - down_variations_np) / 2 - # calculate total variance, indexed by sample, bin (per-channel numbers act - # like additional channels with one bin each) + # calculate total variance, indexed by sample, bin (per-channel numbers act like + # additional channels with one bin each) if np.count_nonzero(corr_mat - np.diagflat(np.ones_like(parameters))) == 0: # no off-diagonal contributions from correlation matrix (e.g. pre-fit) total_variance = np.sum(np.power(sym_uncs, 2), axis=0) From 0919f2478fb7f6e2683bcaf9a52c9b7a7f9496c7 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:55:54 +0200 Subject: [PATCH 11/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 71545c53..8e7d1df0 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -308,7 +308,7 @@ def yield_stdev( # (channel, sample, bin) -> (sample, bin) where "bin" becomes channel sums up_yields_channel_sum = up_yields_channel_sum.reshape(up_yields_channel_sum.shape[:-1]).T - # concatenate per-channel sum to up_comb + # concatenate per-channel sums to up_comb (along bin axis) up_yields = np.concatenate((up_comb, up_yields_channel_sum), axis=1) # indices: variation, sample, bin up_variations.append(up_yields.tolist()) From 89f80b6b2945639368256d174782d6e92424d2da Mon Sep 17 00:00:00 2001 From: ekauffma Date: Fri, 30 Jun 2023 12:59:38 +0200 Subject: [PATCH 12/18] rearranged lines which were too long --- src/cabinetry/model_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 8e7d1df0..22ba4a50 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -306,7 +306,9 @@ def yield_stdev( ) # reshape to drop bin axis, transpose to turn channel axis into new bin axis # (channel, sample, bin) -> (sample, bin) where "bin" becomes channel sums - up_yields_channel_sum = up_yields_channel_sum.reshape(up_yields_channel_sum.shape[:-1]).T + up_yields_channel_sum = up_yields_channel_sum.reshape( + up_yields_channel_sum.shape[:-1] + ).T # concatenate per-channel sums to up_comb (along bin axis) up_yields = np.concatenate((up_comb, up_yields_channel_sum), axis=1) @@ -332,7 +334,9 @@ def yield_stdev( for chan_yields in down_yields_per_channel ] ) - down_yields_channel_sum = down_yields_channel_sum.reshape(down_yields_channel_sum.shape[:-1]).T + down_yields_channel_sum = down_yields_channel_sum.reshape( + down_yields_channel_sum.shape[:-1] + ).T down_yields = np.concatenate((down_comb, down_yields_channel_sum), axis=1) down_variations.append(down_yields) From 4677461e5d83130be9442f0cdadbf2d0685c1d41 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:00:50 +0200 Subject: [PATCH 13/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 22ba4a50..38a7534a 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -361,7 +361,7 @@ def yield_stdev( # variance = sum_i sum_j v[i] * M[i, j] * v[j] # where the product between elements of v again is elementwise (multiplying bin # yields), and the final variance shape is the same as element of v (yield - # uncertainties per bin, sample) + # uncertainties per sample, bin) # possible optimizations that could be considered here: # - skipping staterror-staterror terms for per-bin calculation (orthogonal) From 4b7866a748900e6c22f4fa5caefab57a53714da2 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Tue, 4 Jul 2023 16:30:36 +0200 Subject: [PATCH 14/18] n_bins change and flipped order of bin vs channel calculations --- src/cabinetry/model_utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 38a7534a..597b17f2 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -376,18 +376,17 @@ def yield_stdev( # finally perform sums over i and j, remaining indices: sample, bin total_variance = np.sum(np.sum(v_times_m_times_v, axis=1), axis=0) + # get number of bins from model config + n_bins = sum(model.config.channel_nbins.values()) + # convert to standard deviations per bin and per channel - # calculate separation between per-bin indices and per-channel indices - last_bin_index = model.config.channel_slices[model.config.channels[-1]].stop - # indices: (channel, sample) - total_stdev_per_channel = np.sqrt(total_variance[:, last_bin_index:].T).tolist() # indices: (channel, sample, bin) total_stdev_per_bin = [ - np.sqrt( - total_variance[:, :last_bin_index][:, model.config.channel_slices[ch]] - ).tolist() + np.sqrt(total_variance[:, :n_bins][:, model.config.channel_slices[ch]]).tolist() for ch in model.config.channels ] + # indices: (channel, sample) + total_stdev_per_channel = np.sqrt(total_variance[:, n_bins:].T).tolist() # log total stdev per bin / channel (-1 index for sample sum) total_stdev_bin = [ total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) From e541e7c72d525ff04b2e7709a68949add10730c2 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Tue, 4 Jul 2023 16:32:00 +0200 Subject: [PATCH 15/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 597b17f2..ac999e93 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -380,6 +380,7 @@ def yield_stdev( n_bins = sum(model.config.channel_nbins.values()) # convert to standard deviations per bin and per channel + # add back outer channel dimension for per-bin uncertainty # indices: (channel, sample, bin) total_stdev_per_bin = [ np.sqrt(total_variance[:, :n_bins][:, model.config.channel_slices[ch]]).tolist() From 3c7f3176fd5f327f85122d9024a56dcb5add5211 Mon Sep 17 00:00:00 2001 From: ekauffma Date: Tue, 4 Jul 2023 16:34:58 +0200 Subject: [PATCH 16/18] added alex's comment about per-channel calculation --- src/cabinetry/model_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index ac999e93..f2a98263 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -386,6 +386,8 @@ def yield_stdev( np.sqrt(total_variance[:, :n_bins][:, model.config.channel_slices[ch]]).tolist() for ch in model.config.channels ] + # per-channel: transpose to flip remaining dimensions (channel sums acted as + # individual bins before) # indices: (channel, sample) total_stdev_per_channel = np.sqrt(total_variance[:, n_bins:].T).tolist() # log total stdev per bin / channel (-1 index for sample sum) From 2332ce5b76297c6398ad1be6075e05141fec3d61 Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Wed, 5 Jul 2023 08:24:06 +0200 Subject: [PATCH 17/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index f2a98263..2b7d7ca6 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -390,6 +390,7 @@ def yield_stdev( # individual bins before) # indices: (channel, sample) total_stdev_per_channel = np.sqrt(total_variance[:, n_bins:].T).tolist() + # log total stdev per bin / channel (-1 index for sample sum) total_stdev_bin = [ total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) From ab4ceb5dff02bcf7d64d1ebd0d3e13d93f0aefcb Mon Sep 17 00:00:00 2001 From: Elliott Kauffman <65742271+ekauffma@users.noreply.github.com> Date: Wed, 5 Jul 2023 08:24:16 +0200 Subject: [PATCH 18/18] Update src/cabinetry/model_utils.py Co-authored-by: Alexander Held <45009355+alexander-held@users.noreply.github.com> --- src/cabinetry/model_utils.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 2b7d7ca6..0d75fbc8 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -392,13 +392,10 @@ def yield_stdev( total_stdev_per_channel = np.sqrt(total_variance[:, n_bins:].T).tolist() # log total stdev per bin / channel (-1 index for sample sum) - total_stdev_bin = [ - total_stdev_per_bin[i][-1] for i in range(len(total_stdev_per_bin)) - ] + n_channels = len(model.config.channels) + total_stdev_bin = [total_stdev_per_bin[i][-1] for i in range(n_channels)] log.debug(f"total stdev is {total_stdev_bin}") - total_stdev_chan = [ - total_stdev_per_channel[i][-1] for i in range(len(total_stdev_per_channel)) - ] + total_stdev_chan = [total_stdev_per_channel[i][-1] for i in range(n_channels)] log.debug(f"total stdev per channel is {total_stdev_chan}") # save to cache