diff --git a/fnal_column_analysis_tools/__init__.py b/fnal_column_analysis_tools/__init__.py index 0e46b5e86..eb0ebb1fe 100644 --- a/fnal_column_analysis_tools/__init__.py +++ b/fnal_column_analysis_tools/__init__.py @@ -33,5 +33,6 @@ import fnal_column_analysis_tools.lookup_tools import fnal_column_analysis_tools.analysis_objects import fnal_column_analysis_tools.striped +import fnal_column_analysis_tools.jetmet_tools from fnal_column_analysis_tools.version import __version__ diff --git a/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py b/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py index ff3958fa8..eb7dfc466 100644 --- a/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py +++ b/fnal_column_analysis_tools/analysis_objects/JaggedCandidateArray.py @@ -5,20 +5,24 @@ JaggedTLorentzVectorArray = awkward.Methods.mixin(uproot_methods.classes.TLorentzVector.ArrayMethods, awkward.JaggedArray) #functions to quickly cash useful quantities -def fast_pt(p4): +def _fast_pt(p4): + """ quick pt calculation for caching """ return np.hypot(p4.x,p4.y) -def fast_eta(p4): +def _fast_eta(p4): + """ quick eta calculation for caching """ px = p4.x py = p4.y pz = p4.z - p3mag = np.sqrt(px*px + py*py + pz*pz) - return np.arctanh(pz/p3mag) + pT = np.sqrt(px*px + py*py) + return np.arcsinh(pz/pT) -def fast_phi(p4): +def _fast_phi(p4): + """ quick phi calculation for caching """ return np.arctan2(p4.y,p4.x) -def fast_mass(p4): +def _fast_mass(p4): + """ quick mass calculation for caching """ px = p4.x py = p4.y pz = p4.z @@ -26,26 +30,78 @@ def fast_mass(p4): p3mag2 = (px*px + py*py + pz*pz) return np.sqrt(np.abs(en*en - p3mag2)) +def _default_match(combs,deltaRCut=10000, deltaPtCut=10000): + """ default matching function for match(), match in deltaR / deltaPt """ + passPtCut =(( np.abs(combs.i0.pt - combs.i1.pt)/combs.i0.pt ) < deltaPtCut) + mask = (combs.i0.delta_r(combs.i1) < deltaRCut)&passPtCut + return mask.any() + +def _default_argmatch(combs,deltaRCut=10000, deltaPtCut=10000): + """ default matching function for argmatch(), match in deltaR / deltaPt """ + deltaPts = ( np.abs(combs.i0.pt - combs.i1.pt)/combs.i0.pt ) + deltaRs = combs.i0.delta_r(combs.i1) + indexOfMin = deltaRs.argmin() + indexOfMinOutShape = indexOfMin.flatten(axis=1) + passesCut = (deltaRs[indexOfMin] < deltaRCut)&(deltaPts[indexOfMin] < deltaPtCut) + passesCutOutShape = passesCut.flatten(axis=1) + flatPass = passesCutOutShape.flatten() + flatIdxMin = indexOfMinOutShape.flatten() + flatIdxMin[~flatPass] = -1 + return awkward.JaggedArray.fromoffsets(passesCutOutShape.offsets,flatIdxMin) + class JaggedCandidateMethods(awkward.Methods): + """ + JaggedCandidateMethods defines the additional methods that turn a JaggedArray + into a JaggedCandidateArray suitable for most analysis work. Additional user- + supplied attributes can be accessed via getattr or dot operators. + """ @classmethod def candidatesfromcounts(cls,counts,**kwargs): + """ + cands = JaggedCandidateArray.candidatesfromcounts(counts=counts, + pt=column1, + eta=column2, + phi=column3, + mass=column4, + ...) + """ offsets = awkward.array.jagged.counts2offsets(counts) return cls.candidatesfromoffsets(offsets,**kwargs) @classmethod def candidatesfromoffsets(cls,offsets,**kwargs): + """ + cands = JaggedCandidateArray.candidatesfromoffsets(offsets=offsets, + pt=column1, + eta=column2, + phi=column3, + mass=column4, + ...) + """ items = kwargs argkeys = items.keys() p4 = None + fast_pt = None + fast_eta = None + fast_phi = None + fast_mass = None if 'p4' in argkeys: p4 = items['p4'] if not isinstance(p4,uproot_methods.TLorentzVectorArray): p4 = uproot_methods.TLorentzVectorArray.from_cartesian(p4[:,0],p4[:,1], p4[:,2],p4[:,3]) + fast_pt = _fast_pt(p4) + fast_eta = _fast_eta(p4) + fast_phi = _fast_phi(p4) + fast_mass = _fast_mass(p4) elif 'pt' in argkeys and 'eta' in argkeys and 'phi' in argkeys and 'mass' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_ptetaphim(items['pt'],items['eta'], items['phi'],items['mass']) + fast_pt = items['pt'] + fast_eta = items['eta'] + fast_phi = items['phi'] + fast_mass = items['mass'] del items['pt'] del items['eta'] del items['phi'] @@ -53,6 +109,10 @@ def candidatesfromoffsets(cls,offsets,**kwargs): elif 'pt' in argkeys and 'eta' in argkeys and 'phi' in argkeys and 'energy' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_ptetaphi(items['pt'],items['eta'], items['phi'],items['energy']) + fast_pt = items['pt'] + fast_eta = items['eta'] + fast_phi = items['phi'] + fast_mass = _fast_mass(p4) del items['pt'] del items['eta'] del items['phi'] @@ -60,6 +120,10 @@ def candidatesfromoffsets(cls,offsets,**kwargs): elif 'px' in argkeys and 'py' in argkeys and 'pz' in argkeys and 'mass' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_xyzm(items['px'],items['py'], items['pz'],items['mass']) + fast_pt = _fast_pt(p4) + fast_eta = _fast_eta(p4) + fast_phi = _fast_phi(p4) + fast_mass = items['mass'] del items['px'] del items['py'] del items['pz'] @@ -67,6 +131,10 @@ def candidatesfromoffsets(cls,offsets,**kwargs): elif 'pt' in argkeys and 'phi' in argkeys and 'pz' in argkeys and 'energy' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_cylindrical(items['pt'],items['phi'], items['pz'],items['energy']) + fast_pt = items['pt'] + fast_eta = _fast_eta(p4) + fast_phi = items['phi'] + fast_mass = _fast_mass(p4) del items['pt'] del items['phi'] del items['pz'] @@ -74,6 +142,10 @@ def candidatesfromoffsets(cls,offsets,**kwargs): elif 'px' in argkeys and 'py' in argkeys and 'pz' in argkeys and 'energy' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_cartesian(items['px'],items['py'], items['pz'],items['energy']) + fast_pt = _fast_pt(p4) + fast_eta = _fast_eta(p4) + fast_phi = _fast_phi(p4) + fast_mass = _fast_mass(p4) del items['px'] del items['py'] del items['pz'] @@ -81,95 +153,123 @@ def candidatesfromoffsets(cls,offsets,**kwargs): elif 'p' in argkeys and 'theta' in argkeys and 'phi' in argkeys and 'energy' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_spherical(items['p'],items['theta'], items['phi'],items['energy']) + fast_pt = _fast_pt(p4) + fast_eta = _fast_eta(p4) + fast_phi = items['phi'] + fast_mass = _fast_mass(p4) del items['p'] del items['theta'] del items['phi'] del items['energy'] elif 'p3' in argkeys and 'energy' in argkeys: p4 = uproot_methods.TLorentzVectorArray.from_p3(items['p3'],items['energy']) + fast_pt = _fast_pt(p4) + fast_eta = _fast_eta(p4) + fast_phi = _fast_phi(p4) + fast_mass = _fast_mass(p4) del items['p3'] del items['energy'] else: raise Exception('No valid definition of four-momentum found to build JaggedCandidateArray') items['p4'] = p4 - items['__fast_pt'] = fast_pt(p4) - items['__fast_eta'] = fast_eta(p4) - items['__fast_phi'] = fast_phi(p4) - items['__fast_mass'] = fast_mass(p4) + items['__fast_pt'] = fast_pt + items['__fast_eta'] = fast_eta + items['__fast_phi'] = fast_phi + items['__fast_mass'] = fast_mass return cls.fromoffsets(offsets,awkward.Table(items)) @property def p4(self): + """ return TLorentzVectorArray of candidates """ return self['p4'] @property def pt(self): + """ fast-cache version of pt """ return self['__fast_pt'] @property def eta(self): + """ fast-cache version of eta """ return self['__fast_eta'] @property def phi(self): + """ fast-cache version of phi """ return self['__fast_phi'] @property def mass(self): + """ fast-cache version of mass """ return self['__fast_mass'] @property def i0(self): + """ forward i0 from base """ if 'p4' in self['0'].columns: return self.fromjagged(self['0']) return self['0'] @property def i1(self): + """ forward i1 from base """ if 'p4' in self['1'].columns: return self.fromjagged(self['1']) return self['1'] @property def i2(self): + """ forward i2 from base """ if 'p4' in self['2'].columns: return self.fromjagged(self['2']) return self['2'] @property def i3(self): + """ forward i3 from base """ if 'p4' in self['3'].columns: return self.fromjagged(self['3']) return self['3'] @property def i4(self): + """ forward i4 from base """ if 'p4' in self['4'].columns: return self.fromjagged(self['4']) return self['4'] @property def i5(self): + """ forward i5 from base """ if 'p4' in self['5'].columns: return self.fromjagged(self['5']) return self['5'] @property def i6(self): + """ forward i6 from base """ if 'p4' in self['6'].columns: return self.fromjagged(self['6']) return self['6'] @property def i7(self): + """ forward i7 from base """ if 'p4' in self['7'].columns: return self.fromjagged(self['7']) return self['7'] @property def i8(self): + """ forward i8 from base """ if 'p4' in self['8'].columns: return self.fromjagged(self['8']) return self['8'] @property def i9(self): + """ forward i9 from base """ if 'p4' in self['9'].columns: return self.fromjagged(self['9']) return self['9'] def add_attributes(self,**kwargs): + """ + cands.add_attributes( name1 = column1, + name2 = column2, + ... ) + """ for key,item in kwargs.items(): if isinstance(item,awkward.JaggedArray): self[key] = awkward.JaggedArray.fromoffsets(self.offsets,item.flatten()) @@ -177,26 +277,44 @@ def add_attributes(self,**kwargs): self[key] = awkward.JaggedArray.fromoffsets(self.offsets,item) def distincts(self, nested=False): + """ + This method calls the distincts method of JaggedArray to get all unique + pairs per-event contained in a JaggedCandidateArray. + The resulting JaggedArray of that call is dressed with the jagged candidate + array four-momentum and cached fast access pt/eta/phi/mass. + """ outs = super(JaggedCandidateMethods, self).distincts(nested) outs['p4'] = outs.i0['p4'] + outs.i1['p4'] thep4 = outs['p4'] - outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_pt(thep4.content)) - outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_eta(thep4.content)) - outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_phi(thep4.content)) - outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_mass(thep4.content)) + outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_pt(thep4.content)) + outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_eta(thep4.content)) + outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_phi(thep4.content)) + outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_mass(thep4.content)) return self.fromjagged(outs) def pairs(self, nested=False): + """ + This method calls the pairs method of JaggedArray to get all pairs + per-event contained in a JaggedCandidateArray. + The resulting JaggedArray of that call is dressed with the jagged candidate + array four-momentum and cached fast access pt/eta/phi/mass. + """ outs = super(JaggedCandidateMethods, self).pairs(nested) outs['p4'] = outs.i0['p4'] + outs.i1['p4'] thep4 = outs['p4'] - outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_pt(thep4.content)) - outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_eta(thep4.content)) - outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_phi(thep4.content)) - outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_mass(thep4.content)) + outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_pt(thep4.content)) + outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_eta(thep4.content)) + outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_phi(thep4.content)) + outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_mass(thep4.content)) return self.fromjagged(outs) def cross(self, other, nested=False): + """ + This method calls the cross method of JaggedArray to get all pairs + per-event with another JaggedCandidateArray. + The resulting JaggedArray of that call is dressed with the jagged candidate + array four-momentum and cached fast access pt/eta/phi/mass. + """ outs = super(JaggedCandidateMethods, self).cross(other,nested) #currently JaggedArray.cross() has some funny behavior when it encounters the # p4 column and makes some wierd new column... for now I just delete it and reorder @@ -223,41 +341,46 @@ def cross(self, other, nested=False): else: outs['p4'] = outs['p4'] + outs[key]['p4'] thep4 = outs['p4'] - outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_pt(thep4.content)) - outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_eta(thep4.content)) - outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_phi(thep4.content)) - outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,fast_mass(thep4.content)) + outs['__fast_pt'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_pt(thep4.content)) + outs['__fast_eta'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_eta(thep4.content)) + outs['__fast_phi'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_phi(thep4.content)) + outs['__fast_mass'] = awkward.JaggedArray.fromoffsets(outs.offsets,_fast_mass(thep4.content)) return self.fromjagged(outs) #Function returns a mask with true or false at each location for whether object 1 matched with any object 2s #Optional parameter to add a cut on the percent pt difference between the objects - def match(self, cands, deltaRCut,deltaPtCut=10000): + def _matchcombs(self,cands): + """ + Wrapper function that returns all p4 combinations of this JaggedCandidateArray + with another input JaggedCandidateArray. + """ combinations = self.p4.cross(cands.p4, nested=True) if((~(combinations.i0.pt >0).flatten().flatten().all())|(~(combinations.i1.pt >0).flatten().flatten().all()) ): raise Exception("At least one particle has pt = 0") - passPtCut =(( abs(combinations.i0.pt - combinations.i1.pt)/combinations.i0.pt ) < deltaPtCut) - mask = (combinations.i0.delta_r(combinations.i1) < deltaRCut)&passPtCut - return mask.any() + return combinations + + def match(self, cands, matchfunc=_default_match, **kwargs): + """ returns a mask of candidates that pass matchfunc() """ + combinations = self._matchcombs(cands) + return matchfunc(combinations,**kwargs) #Function returns a fancy indexing. #At each object 1 location is the index of object 2 that it matched best with - #Optional parameter to return an empty list if the best match is not within the deltaRCut - def argmatch(self, cands, deltaRCut=10000, deltaPtCut=10000): - combinations = self.p4.cross(cands.p4, nested=True) - if((~(combinations.i0.pt >0).flatten().flatten().all())|(~(combinations.i1.pt >0).flatten().flatten().all()) ): - raise Exception("At least one particle has pt = 0") - deltaPts = ( abs(combinations.i0.pt - combinations.i1.pt)/combinations.i0.pt ) - deltaRs = combinations.i0.delta_r(combinations.i1) - indexOfMin = deltaRs.argmin() - indexOfMinOutShape = indexOfMin.flatten(axis=1) - passesCut = (deltaRs[indexOfMin] < deltaRCut)&(deltaPts[indexOfMin] < deltaPtCut) - passesCutOutShape = passesCut.flatten(axis=1) - flatPass = passesCutOutShape.flatten() - flatIdxMin = indexOfMinOutShape.flatten() - flatIdxMin[~flatPass] = -1 - return awkward.JaggedArray.fromoffsets(passesCutOutShape.offsets,flatIdxMin) + #<<<>>> selves without a match will get a -1 to preserve counts structure + def argmatch(self, cands, argmatchfunc=_default_argmatch, **kwargs): + """ + returns a jagged array of indices that pass argmatchfunc. + if there is no match a -1 is used to preserve the shape of + the array represented by self + """ + combinations = self._matchcombs(cands) + return argmatchfunc(combinations,**kwargs) def __getattr__(self,what): + """ + extend get attr to allow access to columns, + gracefully thunk down to base methods + """ if what in self.columns: return self[what] thewhat = getattr(super(JaggedCandidateMethods,self),what) diff --git a/fnal_column_analysis_tools/jetmet_tools/FactorizedJetCorrector.py b/fnal_column_analysis_tools/jetmet_tools/FactorizedJetCorrector.py new file mode 100644 index 000000000..824e5948f --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/FactorizedJetCorrector.py @@ -0,0 +1,164 @@ +from ..lookup_tools.jme_standard_function import jme_standard_function +import warnings +import re +import numpy as np +from copy import deepcopy +from awkward import JaggedArray + +def _checkConsistency(against,tocheck): + if against is None: + against = tocheck + else: + if against != tocheck: + raise Exception('Corrector for {} is mixed'/ + 'with correctors for {}!'.format(tocheck,against)) + return tocheck + +_levelre = re.compile('[L1-7]+') +def _getLevel(levelName): + matches = _levelre.findall(levelName) + if len(matches) > 1: + raise Exception('Malformed JEC level name: {}'.format(levelName)) + return matches[0] + +_level_order = ['L1','L2','L3','L2L3'] + +class FactorizedJetCorrector(object): + """ + This class is a columnar implementation of the FactorizedJetCorrector tool in + CMSSW and FWLite. It applies a series of JECs in ascending order as defined by + '_level_order', and checks for the consistency of input corrections. + You can use this class as follows: + fjc = FactorizedJetCorrector(name1=corrL1,...) + jetCorrs = fjc(JetParameter1=jet.parameter1,...) + """ + def __init__(self,**kwargs): + """ + You construct a FactorizedJetCorrector by passing in a dict of names and functions. + Names must be formatted as '____'. + """ + jettype = None + levels = [] + funcs = [] + datatype = None + campaign = None + dataera = None + for name,func in kwargs.items(): + if not isinstance(func,jme_standard_function): + raise Exception('{} is a {} and not a jme_standard_function!'.format(name, + type(func))) + info = name.split('_') + if len(info) != 5: + raise Exception('Corrector name is not properly formatted!') + + campaign = _checkConsistency(campaign,info[0]) + dataera = _checkConsistency(dataera,info[1]) + datatype = _checkConsistency(datatype,info[2]) + levels.append(info[3]) + funcs.append(func) + jettype = _checkConsistency(jettype,info[4]) + + if campaign is None: + raise Exception('Unable to determine production campaign of JECs!') + else: + self._campaign = campaign + + if dataera is None: + raise Exception('Unable to determine data era of JECs!') + else: + self._dataera = dataera + + if datatype is None: + raise Exception('Unable to determine if JECs are for MC or Data!') + else: + self._datatype = datatype + + if len(levels) == 0: + raise Exception('No levels provided?') + else: + self._levels = levels + self._funcs = funcs + + if jettype is None: + raise Exception('Unable to determine type of jet to correct!') + else: + self._jettype = jettype + + for i,level in enumerate(self._levels): + this_level = _getLevel(level) + ord_idx = _level_order.index(this_level) + if i != this_level: + self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i] + self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i] + + #now we setup the call signature for this factorized JEC + self._signature = [] + for func in self._funcs: + sig = func.signature + for input in sig: + if input not in self._signature: + self._signature.append(input) + + @property + def signature(self): + """ list the necessary jet properties that must be input to this function """ + return self._signature + + def __repr__(self): + out = 'campaign : %s\n'%(self._campaign) + out += 'data era : %s\n'%(self._dataera) + out += 'data type : %s\n'%(self._datatype) + out += 'jet type : %s\n'%(self._jettype) + out += 'levels : %s\n'%(','.join(self._levels)) + out += 'signature : (%s)\n'%(','.join(self._signature)) + return out + + def getCorrection(self,**kwargs): + """ + Returns the set of corrections for all input jets at the highest available level + use like: + jecs = corrector.getCorrection(JetProperty1=jet.property1,...) + """ + subCorrs = self.getSubCorrections(**kwargs) + return subCorrs[-1] + + def getSubCorrections(self,**kwargs): + """ + Returns the set of corrections for all input jets broken down by level + use like: + jecs = corrector.getSubCorrections(JetProperty1=jet.property1,...) + 'jecs' will be formatted like [[jec_jet1 jec_jet2 ...] ...] + """ + localargs = kwargs + firstarg = localargs[self._signature[0]] + cumulativeCorrection = 1.0 + offsets = None + if isinstance(firstarg,JaggedArray): + offsets = firstarg.offsets + cumulativeCorrection = firstarg.ones_like().content + for key in localargs.keys(): + localargs[key] = localargs[key].content + else: + cumulativeCorrection = np.ones_like(firstarg) + corrVars = [] + if 'JetPt' in localargs.keys(): + corrVars.append('JetPt') + if 'JetE' in localargs.keys(): + corrVars.append('JetE') + if len(corrVars) == 0: + raise Exception('No variable to correct, need JetPt or JetE in inputs!') + corrections = [] + for i,func in enumerate(self._funcs): + sig = func.signature + args = [] + for input in sig: + args.append(localargs[input]) + corr = func(*tuple(args)) + for var in corrVars: + localargs[var] *= corr + cumulativeCorrection *= corr + corrections.append(cumulativeCorrection) + if offsets is not None: + for i in range(len(corrections)): + corrections[i] = JaggedArray.fromoffsets(offsets,corrections[i]) + return corrections diff --git a/fnal_column_analysis_tools/jetmet_tools/JetCorrectionUncertainty.py b/fnal_column_analysis_tools/jetmet_tools/JetCorrectionUncertainty.py new file mode 100644 index 000000000..feee9e5ea --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/JetCorrectionUncertainty.py @@ -0,0 +1,130 @@ +from ..lookup_tools.jec_uncertainty_lookup import jec_uncertainty_lookup +import warnings +import re +import numpy as np +from copy import deepcopy +from awkward import JaggedArray + +def _checkConsistency(against,tocheck): + if against is None: + against = tocheck + else: + if against != tocheck: + raise Exception('Corrector for {} is mixed'/ + 'with correctors for {}!'.format(tocheck,against)) + return tocheck + +_levelre = re.compile('Uncertainty') +def _getLevel(levelName): + matches = _levelre.findall(levelName) + if len(matches) != 1: + raise Exception('Malformed JUNC level name: {}'.format(levelName)) + return matches[0] + +_level_order = ['Uncertainty'] + +class JetCorrectionUncertainty(object): + """ + This class is a columnar implementation of the JetCorrectionUncertainty tool in + CMSSW and FWLite. It calculates the jet energy scale uncertainty for a corrected jet + in a given binning. + You can use this class as follows: + jcu = JetCorrectionUncertainty(name1=corrL1,...) + jetUncs = jcu(JetParameter1=jet.parameter1,...) + """ + def __init__(self,**kwargs): + """ + You construct a JetCorrectionUncertainty by passing in a dict of names and functions. + Names must be formatted as '____'. + """ + jettype = None + levels = [] + funcs = [] + datatype = None + campaign = None + dataera = None + for name,func in kwargs.items(): + if not isinstance(func,jec_uncertainty_lookup): + raise Exception('{} is a {} and not a jec_uncertainty_lookup!'.format(name, + type(func))) + info = name.split('_') + if len(info) != 5: + raise Exception('Corrector name is not properly formatted!') + + campaign = _checkConsistency(campaign,info[0]) + dataera = _checkConsistency(dataera,info[1]) + datatype = _checkConsistency(datatype,info[2]) + levels.append(info[3]) + funcs.append(func) + jettype = _checkConsistency(jettype,info[4]) + + if campaign is None: + raise Exception('Unable to determine production campaign of JECs!') + else: + self._campaign = campaign + + if dataera is None: + raise Exception('Unable to determine data era of JECs!') + else: + self._dataera = dataera + + if datatype is None: + raise Exception('Unable to determine if JECs are for MC or Data!') + else: + self._datatype = datatype + + if len(levels) == 0: + raise Exception('No levels provided?') + else: + self._levels = levels + self._funcs = funcs + + if jettype is None: + raise Exception('Unable to determine type of jet to correct!') + else: + self._jettype = jettype + + for i,level in enumerate(self._levels): + this_level = _getLevel(level) + ord_idx = _level_order.index(this_level) + if i != this_level: + self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i] + self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i] + + #now we setup the call signature for this factorized JEC + self._signature = [] + for func in self._funcs: + sig = func.signature + for input in sig: + if input not in self._signature: + self._signature.append(input) + + @property + def signature(self): + """ list the necessary jet properties that must be input to this function """ + return self._signature + + def __repr__(self): + out = 'campaign : %s\n'%(self._campaign) + out += 'data era : %s\n'%(self._dataera) + out += 'data type : %s\n'%(self._datatype) + out += 'jet type : %s\n'%(self._jettype) + out += 'levels : %s\n'%(','.join(self._levels)) + out += 'signature : (%s)\n'%(','.join(self._signature)) + return out + + def getUncertainty(self,**kwargs): + """ + Returns the set of uncertainties for all input jets at the highest available level + use like: + juncs = uncertainty.getUncertainty(JetProperty1=jet.property1,...) + 'juncs' will be formatted like [[[up_val down_val]_jet1 ... ] ...] + """ + uncs = [] + for i,func in enumerate(self._funcs): + sig = func.signature + args = [] + for input in sig: + args.append(kwargs[input]) + uncs.append(func(*tuple(args))) + return uncs[-1] diff --git a/fnal_column_analysis_tools/jetmet_tools/JetResolution.py b/fnal_column_analysis_tools/jetmet_tools/JetResolution.py new file mode 100644 index 000000000..f4ade0496 --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/JetResolution.py @@ -0,0 +1,130 @@ +from ..lookup_tools.jme_standard_function import jme_standard_function +import warnings +import re +import numpy as np +from copy import deepcopy +from awkward import JaggedArray + +def _checkConsistency(against,tocheck): + if against is None: + against = tocheck + else: + if against != tocheck: + raise Exception('Corrector for {} is mixed'/ + 'with correctors for {}!'.format(tocheck,against)) + return tocheck + +_levelre = re.compile('Resolution') +def _getLevel(levelName): + matches = _levelre.findall(levelName) + if len(matches) > 1: + raise Exception('Malformed JEC level name: {}'.format(levelName)) + return matches[0] + +_level_order = ['Resolution'] + +class JetResolution(object): + """ + This class is a columnar implementation of the JetResolution tool in + CMSSW and FWLite. It calculates the jet energy resolution for a corrected jet + in a given binning. + You can use this class as follows: + jr = JetResolution(name1=corrL1,...) + jetRes = jr(JetParameter1=jet.parameter1,...) + """ + def __init__(self,**kwargs): + """ + You construct a JetResolution by passing in a dict of names and functions. + Names must be formatted as '____'. + """ + jettype = None + levels = [] + funcs = [] + datatype = None + campaign = None + dataera = None + for name,func in kwargs.items(): + if not isinstance(func,jme_standard_function): + raise Exception('{} is a {} and not a jme_standard_function!'.format(name, + type(func))) + info = name.split('_') + if len(info) != 5: + raise Exception('Corrector name is not properly formatted!') + + campaign = _checkConsistency(campaign,info[0]) + dataera = _checkConsistency(dataera,info[1]) + datatype = _checkConsistency(datatype,info[2]) + levels.append(info[3]) + funcs.append(func) + jettype = _checkConsistency(jettype,info[4]) + + if campaign is None: + raise Exception('Unable to determine production campaign of JECs!') + else: + self._campaign = campaign + + if dataera is None: + raise Exception('Unable to determine data era of JECs!') + else: + self._dataera = dataera + + if datatype is None: + raise Exception('Unable to determine if JECs are for MC or Data!') + else: + self._datatype = datatype + + if len(levels) == 0: + raise Exception('No levels provided?') + else: + self._levels = levels + self._funcs = funcs + + if jettype is None: + raise Exception('Unable to determine type of jet to correct!') + else: + self._jettype = jettype + + for i,level in enumerate(self._levels): + this_level = _getLevel(level) + ord_idx = _level_order.index(this_level) + if i != this_level: + self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i] + self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i] + + #now we setup the call signature for this factorized JEC + self._signature = [] + for func in self._funcs: + sig = func.signature + for input in sig: + if input not in self._signature: + self._signature.append(input) + + @property + def signature(self): + """ list the necessary jet properties that must be input to this function """ + return self._signature + + def __repr__(self): + out = 'campaign : %s\n'%(self._campaign) + out += 'data era : %s\n'%(self._dataera) + out += 'data type : %s\n'%(self._datatype) + out += 'jet type : %s\n'%(self._jettype) + out += 'levels : %s\n'%(','.join(self._levels)) + out += 'signature : (%s)\n'%(','.join(self._signature)) + return out + + def getResolution(self,**kwargs): + """ + Returns the set of resolutions for all input jets at the highest available level + use like: + jrs = reso.getResolution(JetProperty1=jet.property1,...) + 'jrs' will be formatted like [[jr_jet1 jr_jet2 ...] ...] + """ + resos = [] + for i,func in enumerate(self._funcs): + sig = func.signature + args = [] + for input in sig: + args.append(kwargs[input]) + resos.append(func(*tuple(args))) + return resos[-1] diff --git a/fnal_column_analysis_tools/jetmet_tools/JetResolutionScaleFactor.py b/fnal_column_analysis_tools/jetmet_tools/JetResolutionScaleFactor.py new file mode 100644 index 000000000..13be53dc7 --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/JetResolutionScaleFactor.py @@ -0,0 +1,131 @@ +from ..lookup_tools.jersf_lookup import jersf_lookup +import warnings +import re +import numpy as np +from copy import deepcopy +from awkward import JaggedArray + +def _checkConsistency(against,tocheck): + if against is None: + against = tocheck + else: + if against != tocheck: + raise Exception('Corrector for {} is mixed'/ + 'with correctors for {}!'.format(tocheck,against)) + return tocheck + +_levelre = re.compile('SF+') +def _getLevel(levelName): + matches = _levelre.findall(levelName) + if len(matches) > 1: + raise Exception('Malformed JERSF level name: {}'.format(levelName)) + return matches[0] + +_level_order = ['SF'] + +class JetResolutionScaleFactor(object): + """ + This class is a columnar implementation of the JetResolutionScaleFactor tool in + CMSSW and FWLite. It calculates the jet energy resolution scale factor for a + corrected jet in a given binning. + You can use this class as follows: + jersf = JetResolutionScaleFactor(name1=corrL1,...) + jetResSF = jersf(JetParameter1=jet.parameter1,...) + """ + def __init__(self,**kwargs): + """ + You construct a JetResolutionScaleFactor by passing in a dict of names and functions. + Names must be formatted as '____'. + """ + jettype = None + levels = [] + funcs = [] + datatype = None + campaign = None + dataera = None + for name,func in kwargs.items(): + if not isinstance(func,jersf_lookup): + raise Exception('{} is a {} and not a jersf_lookup!'.format(name,type(func))) + info = name.split('_') + if len(info) != 5: + raise Exception('Corrector name is not properly formatted!') + + campaign = _checkConsistency(campaign,info[0]) + dataera = _checkConsistency(dataera,info[1]) + datatype = _checkConsistency(datatype,info[2]) + levels.append(info[3]) + funcs.append(func) + jettype = _checkConsistency(jettype,info[4]) + + if campaign is None: + raise Exception('Unable to determine production campaign of JECs!') + else: + self._campaign = campaign + + if dataera is None: + raise Exception('Unable to determine data era of JECs!') + else: + self._dataera = dataera + + if datatype is None: + raise Exception('Unable to determine if JECs are for MC or Data!') + else: + self._datatype = datatype + + if len(levels) == 0: + raise Exception('No levels provided?') + else: + self._levels = levels + self._funcs = funcs + + if jettype is None: + raise Exception('Unable to determine type of jet to correct!') + else: + self._jettype = jettype + + for i,level in enumerate(self._levels): + this_level = _getLevel(level) + ord_idx = _level_order.index(this_level) + if i != this_level: + self._levels[i],self._levels[ord_idx] = self._levels[ord_idx],self._levels[i] + self._funcs[i],self._funcs[ord_idx] = self._funcs[ord_idx],self._funcs[i] + + #now we setup the call signature for this factorized JEC + self._signature = [] + for func in self._funcs: + sig = func.signature + for input in sig: + if input not in self._signature: + self._signature.append(input) + + @property + def signature(self): + """ list the necessary jet properties that must be input to this function """ + return self._signature + + def __repr__(self): + out = 'campaign : %s\n'%(self._campaign) + out += 'data era : %s\n'%(self._dataera) + out += 'data type : %s\n'%(self._datatype) + out += 'jet type : %s\n'%(self._jettype) + out += 'levels : %s\n'%(','.join(self._levels)) + out += 'signature : (%s)\n'%(','.join(self._signature)) + return out + + def getScaleFactor(self,**kwargs): + """ + Returns the set of resolutions for all input jets at the highest available level + use like: + jersfs = jersf.getScaleFactor(JetProperty1=jet.property1,...) + 'jersfs' will be formatted like [[[up_val central_cal down_val]_jet1 ...] ...] + """ + sfs = [] + for i,func in enumerate(self._funcs): + sig = func.signature + args = [] + for input in sig: + args.append(kwargs[input]) + sfs.append(func(*tuple(args))) + return sfs[-1] + + diff --git a/fnal_column_analysis_tools/jetmet_tools/JetTransformer.py b/fnal_column_analysis_tools/jetmet_tools/JetTransformer.py new file mode 100644 index 000000000..94540435f --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/JetTransformer.py @@ -0,0 +1,145 @@ +from .FactorizedJetCorrector import FactorizedJetCorrector +from .JetResolution import JetResolution +from .JetResolutionScaleFactor import JetResolutionScaleFactor +from .JetCorrectionUncertainty import JetCorrectionUncertainty + +from ..analysis_objects.JaggedCandidateArray import JaggedCandidateArray + +import numpy as np +from uproot_methods import TLorentzVectorArray +from copy import deepcopy + +_signature_map = {'JetPt':'pt', + 'JetEta':'eta', + 'Rho':'rho', + 'JetA':'area' + } + +def _update_jet_ptm(corr,jet,fromRaw=False): + """ + This is a hack to update the jet pt and jet mass in place + as we apply corrections and smearings. + """ + if fromRaw: + jet._content._contents['__fast_pt'] = corr * jet.ptRaw.content + jet._content._contents['__fast_mass'] = corr * jet.massRaw.content + else: + jet._content._contents['__fast_pt'] = corr * jet.pt.content + jet._content._contents['__fast_mass'] = corr * jet.mass.content + + +#the class below does use hacks of JaggedCandidateArray to achieve the desired behavior +#no monkey patches though +class JetTransformer(object): + """ + This class is a columnar implementation of the the standard recipes for apply JECs, and + the various scale factors and uncertainties therein. + - Only the stochastic smearing method is implemented at the moment. + It uses the FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, and + JetCorrectionUncertainty classes to calculate the ingredients for the final updated jet + object, which will be modified in place. + The jet object must be a "JaggedCandidateArray" and have the additional properties: + - ptRaw + - massRaw + These will be used to reset the jet pT and mass, and then calculate the updated pTs and + masses for various corrections and smearings. + You can use this class like: + xformer = JetTransformer(name1=corrL1,...) + xformer.transform(jet) + """ + def __init__(self,jec=None,junc=None,jer=None,jersf=None): + if jec is None: + raise Exception('JetTransformer must have "jec" specified as an argument!') + if not isinstance(jec,FactorizedJetCorrector): + raise Exception('JetTransformer needs a FactorizedJetCorrecter passed as "jec"'+ + ' got object of type {}'.format(type(jec))) + self._jec = jec + + if junc is not None and not isinstance(junc,JetCorrectionUncertainty): + raise Exception('"junc" must be of type "JetCorrectionUncertainty"'+ + ' got {}'.format(type(junc))) + self._junc = junc + + if (jer is None) != (jersf is None): + raise Exception('Cannot apply JER-SF without an input JER, and vice-versa!') + + if jer is not None and not isinstance(jer,JetResolution): + raise Exception('"jer" must be of type "JetResolution"'+ + ' got {}'.format(type(jer))) + self._jer = jer + + if jersf is not None and not isinstance(jersf,JetResolutionScaleFactor): + raise Exception('"jersf" must be of type "JetResolutionScaleFactor"'+ + ' got {}'.format(type(jersf))) + self._jersf = jersf + + def transform(self,jet): + """ + precondition - jet is a JaggedCandidateArray with additional attributes: + - 'ptRaw' + - 'massRaw' + xformer = JetTransformer(name1=corrL1,...) + xformer.transform(jet) + postcondition - jet.pt, jet.mass, jet.p4 are updated to represent the corrected jet + based on the input correction set + """ + if not isinstance(jet,JaggedCandidateArray): + raise Exception('Input data must be a JaggedCandidateArray!') + if ( 'ptRaw' not in jet.columns or 'massRaw' not in jet.columns ): + raise Exception('Input JaggedCandidateArray must have "ptRaw" & "massRaw"!') + + #initialize the jet momenta to raw values + _update_jet_ptm(1.0,jet,fromRaw=True) + + #below we work in numpy arrays, JaggedCandidateArray knows how to convert them + args = {key:getattr(jet,_signature_map[key]).content for key in self._jec.signature} + jec = self._jec.getCorrection(**args) + + _update_jet_ptm(jec,jet,fromRaw=True) + + junc_up = np.ones_like(jec) + junc_down = np.ones_like(jec) + if self._junc is not None: + args = {key:getattr(jet,_signature_map[key]).content for key in self._junc.signature} + junc = self._junc.getUncertainty(**args) + junc_up = junc[:,0] + junc_down = junc[:,1] + + #if there's a jer and sf to apply we have to update the momentum too + #right now only use stochastic smearing + if self._jer is not None and self._jersf is not None: + args = {key:getattr(jet,_signature_map[key]).content for key in self._jer.signature} + jer = self._jer.getResolution(**args) + + args = {key:getattr(jet,_signature_map[key]).content for key in self._jersf.signature} + jersf = self._jersf.getScaleFactor(**args) + + jsmear_cen = 1. + np.sqrt(jersf[:, 0]**2 - 1.0)*jer*np.random.normal(size=jer.size) + jsmear_up = 1. + np.sqrt(jersf[:, 1]**2 - 1.0)*jer*np.random.normal(size=jer.size) + jsmear_down = 1. + np.sqrt(jersf[:,-1]**2 - 1.0)*jer*np.random.normal(size=jer.size) + + # need to apply up and down jer-smear before applying central correction + jet.add_attributes( pt_jer_up = jsmear_up * jet.pt.content, + mass_jer_up = jsmear_up * jet.mass.content, + pt_jer_down = jsmear_down * jet.pt.content, + mass_jer_down = jsmear_down * jet.mass.content ) + #finally, update the central value + _update_jet_ptm(jsmear_cen,jet) + + # have to apply central jersf before calculating junc + if self._junc is not None: + jet.add_attributes( pt_jes_up = junc_up * jet.pt.content, + mass_jes_up = junc_up * jet.mass.content, + pt_jes_down = junc_down * jet.pt.content, + mass_jes_down = junc_down * jet.mass.content ) + + #hack to update the jet p4, we have the fully updated pt and mass here + jet._content._contents['p4'] = TLorentzVectorArray.from_ptetaphim( jet.pt.content, + jet.eta.content, + jet.phi.content, + jet.mass.content ) + + + + + diff --git a/fnal_column_analysis_tools/jetmet_tools/__init__.py b/fnal_column_analysis_tools/jetmet_tools/__init__.py new file mode 100644 index 000000000..fcccdb94b --- /dev/null +++ b/fnal_column_analysis_tools/jetmet_tools/__init__.py @@ -0,0 +1,5 @@ +from .FactorizedJetCorrector import FactorizedJetCorrector +from .JetResolution import JetResolution +from .JetResolutionScaleFactor import JetResolutionScaleFactor +from .JetCorrectionUncertainty import JetCorrectionUncertainty +from .JetTransformer import JetTransformer diff --git a/fnal_column_analysis_tools/lookup_tools/evaluator.py b/fnal_column_analysis_tools/lookup_tools/evaluator.py index e1059dd58..7343052f3 100644 --- a/fnal_column_analysis_tools/lookup_tools/evaluator.py +++ b/fnal_column_analysis_tools/lookup_tools/evaluator.py @@ -12,7 +12,23 @@ } class evaluator(object): + """ + The evaluator class serves as a single point of extry for + looking up values of histograms and other functions read in + with the extractor class. Stored look ups can be indexed by + name and then called through an overloaded __call__ function. + Example: + evaluate = extractor.make_evaluator() + vals = evaluate[XYZ](arg1,arg2,...) + The returned 'vals' has the same shape as the input args. + + lookup_types is a map of possible contructors for extracted data + """ def __init__(self,names,types,primitives): + """ + initialize the evaluator from a list of inputs names, + lookup types, and input primitive data + """ self._functions = {} for key in names.keys(): lookup_type = types[names[key]] @@ -20,7 +36,11 @@ def __init__(self,names,types,primitives): self._functions[key] = lookup_types[lookup_type](*lookup_def) def __dir__(self): + """ dir is overloaded to list all available functions + in the evaluator + """ return self._functions.keys() def __getitem__(self, key): + """ return a function named 'key' """ return self._functions[key] diff --git a/fnal_column_analysis_tools/lookup_tools/extractor.py b/fnal_column_analysis_tools/lookup_tools/extractor.py index c5a0fa455..5ff96c40e 100644 --- a/fnal_column_analysis_tools/lookup_tools/extractor.py +++ b/fnal_column_analysis_tools/lookup_tools/extractor.py @@ -18,10 +18,23 @@ 'jec':convert_jec_txt_file, 'jersf':convert_jersf_txt_file, 'jr':convert_jr_txt_file, - 'junc':convert_junc_txt_file} + 'junc':convert_junc_txt_file, + 'ea':convert_effective_area_file} } class extractor(object): + """ + This class defines a common entry point for defining functions that extract + the inputs to build lookup tables from various kinds of files. + The files that can be converted are presently defined in the "file_converters" dict. + The file names are used to determine the converter that is used, i.e.: + something.TYPE.FORMAT will apply the TYPE extractor to a file of given FORMAT + If there is no file type specifier the 'default' value is used. + You can add sets of lookup tables / weights by calling: + extractor.add_weight_set() + is formatted like ' ' + '*' can be used as a wildcard to import all available lookup tables in a file + """ def __init__(self): self._weights = [] self._names = {} @@ -30,6 +43,7 @@ def __init__(self): self._finalized = False def add_weight_set(self,local_name,type,weights): + """ adds one extracted weight to the extractor """ if self._finalized: raise Exception('extractor is finalized cannot add new weights!') if local_name in self._names.keys(): @@ -39,8 +53,10 @@ def add_weight_set(self,local_name,type,weights): self._weights.append(weights) def add_weight_sets(self,weightsdescs): - # expect file to be formatted - # allow * * and * to do easy imports of whole file + """ + expects a list of text lines to be formatted as ' ' + allows * * and * to do easy imports of whole file + """ for weightdesc in weightsdescs: if weightdesc[0] == '#': continue #skip comment lines temp = weightdesc.strip().split(' ') @@ -60,6 +76,7 @@ def add_weight_sets(self,weightsdescs): self.add_weight_set(local_name,type,weights) def import_file(self,file): + """ cache the whole contents of a file for later processing """ if file not in self._filecache.keys(): file_dots = file.split('.') format = file_dots[-1].strip() @@ -68,7 +85,8 @@ def import_file(self,file): type = file_dots[-2] self._filecache[file] = file_converters[format][type](file) - def extract_from_file(self, file, name): + def extract_from_file(self, file, name): + """ import a file and then extract a lookup set """ self.import_file(file) weights = self._filecache[file] names = {key[0]:key[1] for key in weights.keys()} @@ -77,16 +95,14 @@ def extract_from_file(self, file, name): raise Exception('Weights named "{}" not in {}!'.format(name,file)) return (weights[(bname,names[bname])],names[bname]) - def finalize(self): + def finalize(self,reduce_list=None): + """ + stop any further imports and if provided pare down + the stored histograms to those specified in reduce_list + """ if self._finalized: raise Exception('extractor is already finalized!') del self._filecache - self._finalized = True - - def make_evaluator(self,reduce_list=None): - names = self._names - types = self._types - weights = self._weights if reduce_list is not None: names = {} types = [] @@ -97,6 +113,17 @@ def make_evaluator(self,reduce_list=None): names[name] = i types.append(self._types[self._names[name]]) weights.append(self._weights[self._names[name]]) - - return evaluator(names,types,weights) + self._names = names + self._types = types + self._weights = weights + self._finalized = True + + def make_evaluator(self): + """ produce an evaluator based on the finalized extractor """ + if self._finalized: + return evaluator(self._names, + self._types, + self._weights) + else: + raise Exception('Cannot make an evaluator from unfinalized extractor!') diff --git a/fnal_column_analysis_tools/lookup_tools/jec_uncertainty_lookup.py b/fnal_column_analysis_tools/lookup_tools/jec_uncertainty_lookup.py index 1d125e341..4e030251a 100644 --- a/fnal_column_analysis_tools/lookup_tools/jec_uncertainty_lookup.py +++ b/fnal_column_analysis_tools/lookup_tools/jec_uncertainty_lookup.py @@ -3,25 +3,33 @@ import numpy as np from awkward.array.jagged import JaggedArray from copy import deepcopy -import numba from scipy.interpolate import interp1d -from numpy import sqrt,log -from numpy import maximum as max -from numpy import minimum as min - -@numba.njit def masked_bin_eval(dim1_indices, dimN_bins, dimN_vals): dimN_indices = np.empty_like(dim1_indices) for i in np.unique(dim1_indices): - dimN_indices[dim1_indices==i] = np.searchsorted(dimN_bins[i],dimN_vals[dim1_indices==i],side='right') - dimN_indices[dim1_indices==i] = min(dimN_indices[dim1_indices==i]-1,len(dimN_bins[i])-1) - dimN_indices[dim1_indices==i] = max(dimN_indices[dim1_indices==i]-1,0) + idx = np.where(dim1_indices==i) + dimN_indices[idx] = np.clip(np.searchsorted(dimN_bins[i], + dimN_vals[idx], + side='right')-1, + 0,len(dimN_bins[i])-2) return dimN_indices class jec_uncertainty_lookup(lookup_base): + """ + This class defines a lookup table for jet energy scale uncertainties. + The uncertainty values can be looked up with a call as follows: + junc_lut = jec_uncertainty_lookup() + uncertainties = junc_lut(JetProperty1=jet.property1,...) + "uncertainties" will be of the same shape as the input jet properties. + The list of required jet properties are given in junc_lut.signature + """ def __init__(self,formula,bins_and_orders,knots_and_vars): + """ + The constructor takes the output of the "convert_junc_txt_file" + text file converter, which returns a formula, bins, and an interpolation table. + """ super(jec_uncertainty_lookup,self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] @@ -62,29 +70,32 @@ def __init__(self,formula,bins_and_orders,knots_and_vars): self._eval_args[argname] = self._dim_args[argname] def _evaluate(self,*args): + """ uncertainties = f(args) """ bin_vals = {argname:args[self._dim_args[argname]] for argname in self._dim_order} eval_vals = {argname:args[self._eval_args[argname]] for argname in self._eval_vars} #lookup the bins that we care about dim1_name = self._dim_order[0] - dim1_indices = np.searchsorted(self._bins[dim1_name],bin_vals[dim1_name],side='right') - dim1_indices = np.clip(dim1_indices-1,0,self._bins[dim1_name].size-1) - bin_indices = [dim1_indices] - for binname in self._dim_order[1:]: - bin_indices.append(masked_bin_eval(bin_indices[0],self._bins[binname], - bin_vals[binname])) - bin_tuple = tuple(bin_indices) + dim1_indices = np.clip(np.searchsorted(self._bins[dim1_name], + bin_vals[dim1_name], + side='right')-1, + 0,self._bins[dim1_name].size-2) #get clamp values and clip the inputs - eval_ups = np.zeros_like(args[0]) - eval_downs = np.zeros_like(args[0]) + outs = np.ones(shape=(args[0].size,2),dtype=np.float) for i in np.unique(dim1_indices): - eval_ups[dim1_indices==i] = self._eval_ups[i](eval_vals[self._eval_vars[0]][dim1_indices==i]) - eval_downs[dim1_indices==i] = self._eval_downs[i](eval_vals[self._eval_vars[0]][dim1_indices==i]) + mask = np.where(dim1_indices==i) + vals = np.clip(eval_vals[self._eval_vars[0]][mask], + self._eval_knots[0],self._eval_knots[-1]) + outs[:,0][mask] += self._eval_ups[i](vals) + outs[:,1][mask] -= self._eval_downs[i](vals) - central = np.ones_like(eval_ups) - - return np.vstack((central,1.+eval_ups,1.-eval_downs)).T + return outs + + @property + def signature(self): + """ the list of all needed jet properties to be passed as kwargs to this lookup """ + return self._signature def __repr__(self): out = 'binned dims : %s\n'%(self._dim_order) diff --git a/fnal_column_analysis_tools/lookup_tools/jersf_lookup.py b/fnal_column_analysis_tools/lookup_tools/jersf_lookup.py index 2b4ddc3fd..3c277ba70 100644 --- a/fnal_column_analysis_tools/lookup_tools/jersf_lookup.py +++ b/fnal_column_analysis_tools/lookup_tools/jersf_lookup.py @@ -3,23 +3,31 @@ import numpy as np from awkward.array.jagged import JaggedArray from copy import deepcopy -import numba -from numpy import sqrt,log -from numpy import maximum as max -from numpy import minimum as min - -@numba.njit def masked_bin_eval(dim1_indices, dimN_bins, dimN_vals): dimN_indices = np.empty_like(dim1_indices) for i in np.unique(dim1_indices): - dimN_indices[dim1_indices==i] = np.searchsorted(dimN_bins[i],dimN_vals[dim1_indices==i],side='right') - dimN_indices[dim1_indices==i] = min(dimN_indices[dim1_indices==i]-1,len(dimN_bins[i])-1) - dimN_indices[dim1_indices==i] = max(dimN_indices[dim1_indices==i]-1,0) + idx = np.where(dim1_indices==i) + dimN_indices[idx] = np.clip(np.searchsorted(dimN_bins[i], + dimN_vals[idx], + side='right')-1, + 0,len(dimN_bins[i])-2) return dimN_indices class jersf_lookup(lookup_base): + """ + This class defines a lookup table for jet energy resolution scale factors. + The uncertainty values can be looked up with a call as follows: + jersf_lut = jersf_lookup() + SFs = jersf_lut(JetProperty1=jet.property1,...) + "SFs" will be of the same shape as the input jet properties. + The list of required jet properties are given in jersf_lut.signature + """ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): + """ + The constructor takes the output of the "convert_jersf_txt_file" + text file converter, which returns a formula, bins, and values. + """ super(jersf_lookup,self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] @@ -53,13 +61,16 @@ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): self._eval_args[argname] = self._dim_args[argname] def _evaluate(self,*args): + """ SFs = f(args) """ bin_vals = {argname:args[self._dim_args[argname]] for argname in self._dim_order} eval_vals = {argname:args[self._eval_args[argname]] for argname in self._eval_vars} #lookup the bins that we care about dim1_name = self._dim_order[0] - dim1_indices = np.searchsorted(self._bins[dim1_name],bin_vals[dim1_name],side='right') - dim1_indices = np.clip(dim1_indices-1,0,self._bins[dim1_name].size-1) + dim1_indices = np.clip(np.searchsorted(self._bins[dim1_name], + bin_vals[dim1_name], + side='right')-1, + 0,self._bins[dim1_name].size-2) bin_indices = [dim1_indices] for binname in self._dim_order[1:]: bin_indices.append(masked_bin_eval(bin_indices[0],self._bins[binname], @@ -78,6 +89,11 @@ def _evaluate(self,*args): return parm_values + @property + def signature(self): + """ the list of all needed jet properties to be passed as kwargs to this lookup """ + return self._signature + def __repr__(self): out = 'binned dims : %s\n'%(self._dim_order) out += 'eval vars : %s\n'%(self._eval_vars) diff --git a/fnal_column_analysis_tools/lookup_tools/jme_standard_function.py b/fnal_column_analysis_tools/lookup_tools/jme_standard_function.py index ba774655f..69762c8df 100644 --- a/fnal_column_analysis_tools/lookup_tools/jme_standard_function.py +++ b/fnal_column_analysis_tools/lookup_tools/jme_standard_function.py @@ -3,33 +3,77 @@ import numpy as np from awkward.array.jagged import JaggedArray from copy import deepcopy -import numba -from numpy import sqrt,log,exp +from numpy import sqrt,log,exp,abs from numpy import maximum as max from numpy import minimum as min +from numpy import power as pow -def numbaize(fstr, varlist): +def wrap_formula(fstr, varlist): """ - Convert function string to numba function + Convert function string to python function Supports only simple math for now - """ + """ + val = fstr + try: + val = float(fstr) + fstr = 'np.full_like(%s,%f)' % ( varlist[0], val ) + except ValueError: + val = fstr lstr = "lambda %s: %s" % (",".join(varlist), fstr) func = eval(lstr) - nfunc = numba.njit(func) - return nfunc + return func -@numba.njit def masked_bin_eval(dim1_indices, dimN_bins, dimN_vals): dimN_indices = np.empty_like(dim1_indices) for i in np.unique(dim1_indices): - dimN_indices[dim1_indices==i] = np.searchsorted(dimN_bins[i],dimN_vals[dim1_indices==i],side='right') - dimN_indices[dim1_indices==i] = min(dimN_indices[dim1_indices==i]-1,len(dimN_bins[i])-1) - dimN_indices[dim1_indices==i] = max(dimN_indices[dim1_indices==i]-1,0) + idx = np.where(dim1_indices == i) + dimN_indices[idx] = np.clip(np.searchsorted(dimN_bins[i], + dimN_vals[idx], + side='right')-1, + 0,len(dimN_bins[i])-2) return dimN_indices +#idx_in is a tuple of indices in increasing jaggedness +#idx_out is a list of flat indices +def flatten_idxs(idx_in,jaggedarray): + """ + This provides a faster way to convert between tuples of + jagged indices and flat indices in a jagged array's contents + """ + if len(idx_in) == 0: return np.array([],dtype=np.int) + idx_out = jaggedarray.starts[idx_in[0]] + if len(idx_in) == 1: + pass + elif len(idx_in) == 2: + idx_out += idx_in[1] + else: + raise Exception('jme_standard_function only works for two binning dimensions!') + + good_idx = (idx_out < jaggedarray.content.size) + if( (~good_idx).any() ): + input_idxs = tuple([idx_out[~good_idx]]+ + [idx_in[i][~good_idx] for i in range(len(idx_in))]) + raise Exception('Calculated invalid index {} for'\ + ' array with length {}'.format(np.vstack(input_idxs), + jaggedarray.content.size)) + + return idx_out + class jme_standard_function(lookup_base): + """ + This class defines a lookup table for jet energy corrections and resolutions. + The JEC and JER values can be looked up with a call as follows: + jerc_lut = jme_standard_function() + jercs = jerc_lut(JetProperty1=jet.property1,...) + "jercs" will be of the same shape as the input jet properties. + The list of required jet properties are given in jersf_lut.signature + """ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): + """ + The constructor takes the output of the "convert_jec(jr)_txt_file" + text file converter, which returns a formula, bins, and parameter values. + """ super(jme_standard_function,self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] @@ -39,7 +83,7 @@ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): self._parm_order = parms_and_orders[1] self._parms = parms_and_orders[0] self._formula_str = formula - self._formula = numbaize(formula,self._parm_order+self._eval_vars) + self._formula = wrap_formula(formula,self._parm_order+self._eval_vars) for binname in self._dim_order[1:]: binsaslists = self._bins[binname].tolist() @@ -47,7 +91,12 @@ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): #get the jit to compile if we've got more than one bin dim if len(self._dim_order) > 1: - masked_bin_eval(np.array([0]),self._bins[self._dim_order[1]],np.array([0.0])) + masked_bin_eval(np.array([0,0]),self._bins[self._dim_order[1]],np.array([0.0,0.0])) + + #compile the formula + argsize = len(self._parm_order) + len(self._eval_vars) + some_ones = [50*np.ones(argsize) for i in range(argsize)] + _ = self._formula(*tuple(some_ones)) self._signature = deepcopy(self._dim_order) for eval in self._eval_vars: @@ -61,30 +110,65 @@ def __init__(self,formula,bins_and_orders,clamps_and_vars,parms_and_orders): self._eval_args[argname] = self._dim_args[argname] def _evaluate(self,*args): + """ jec/jer = f(args) """ bin_vals = {argname:args[self._dim_args[argname]] for argname in self._dim_order} eval_vals = {argname:args[self._eval_args[argname]] for argname in self._eval_vars} #lookup the bins that we care about dim1_name = self._dim_order[0] - dim1_indices = np.searchsorted(self._bins[dim1_name],bin_vals[dim1_name],side='right') - dim1_indices = np.clip(dim1_indices-1,0,self._bins[dim1_name].size-1) + dim1_indices = np.clip(np.searchsorted(self._bins[dim1_name], + bin_vals[dim1_name], + side='right')-1, + 0,self._bins[dim1_name].size-2) bin_indices = [dim1_indices] for binname in self._dim_order[1:]: - bin_indices.append(masked_bin_eval(bin_indices[0],self._bins[binname],bin_vals[binname])) + bin_indices.append(masked_bin_eval(bin_indices[0], + self._bins[binname], + bin_vals[binname])) + bin_tuple = tuple(bin_indices) - + #get clamp values and clip the inputs eval_values = [] for eval_name in self._eval_vars: - clamp_mins = self._eval_clamp_mins[eval_name][bin_tuple] - clamp_maxs = self._eval_clamp_maxs[eval_name][bin_tuple] + clamp_mins = None + if self._eval_clamp_mins[eval_name].content.size == 1: + clamp_mins = self._eval_clamp_mins[eval_name].content[0] + else: + idxs = flatten_idxs(bin_tuple,self._eval_clamp_mins[eval_name]) + clamp_mins = self._eval_clamp_mins[eval_name].content[idxs] + if isinstance(clamp_mins,JaggedArray): + if clamp_mins.content.size == 1: + clamp_mins = clamp_mins.content[0] + else: + clamp_mins = clamp_mins.flatten() + clamp_maxs = None + if self._eval_clamp_maxs[eval_name].content.size == 1: + clamp_maxs = self._eval_clamp_maxs[eval_name].content[0] + else: + idxs = flatten_idxs(bin_tuple,self._eval_clamp_maxs[eval_name]) + clamp_maxs = self._eval_clamp_maxs[eval_name].content[idxs] + if isinstance(clamp_maxs,JaggedArray): + if clamp_maxs.content.size == 1: + clamp_maxs = clamp_maxs.content[0] + else: + clamp_maxs = clamp_maxs.flatten() eval_values.append(np.clip(eval_vals[eval_name],clamp_mins,clamp_maxs)) #get parameter values - parm_values = [parm[bin_tuple] for parm in self._parms] + parm_values = [] + if len(self._parms) > 0: + idxs = flatten_idxs(bin_tuple,self._parms[0]) + parm_values = [parm.content[idxs] for parm in self._parms] + return self._formula(*tuple(parm_values+eval_values)) + @property + def signature(self): + """ the list of all needed jet properties to be passed as kwargs to this lookup """ + return self._signature + def __repr__(self): out = 'binned dims: %s\n'%(self._dim_order) out += 'eval vars : %s\n'%(self._eval_vars) diff --git a/fnal_column_analysis_tools/lookup_tools/txt_converters.py b/fnal_column_analysis_tools/lookup_tools/txt_converters.py index d3602e32b..b58a8a6fe 100644 --- a/fnal_column_analysis_tools/lookup_tools/txt_converters.py +++ b/fnal_column_analysis_tools/lookup_tools/txt_converters.py @@ -25,8 +25,12 @@ def _parse_jme_formatted_file(jmeFilePath,interpolatedFunc=False,parmsFromColumn while( formula.count('[%i]'%nParms) ): formula = formula.replace('[%i]'%nParms,'p%i'%nParms) nParms += 1 + #get rid of TMath + tmath = {'TMath::Max':'max','TMath::Log':'log','TMath::Power':'pow'} + for key,rpl in tmath.items(): + formula = formula.replace(key,rpl) #protect function names with vars in them - funcs_to_cap = ['max','exp'] + funcs_to_cap = ['max','exp','pow'] for f in funcs_to_cap: formula = formula.replace(f,f.upper()) @@ -125,14 +129,15 @@ def _build_standard_jme_lookup(name,layout,pars,nBinnedVars,nBinColumns, offset_name = nBinnedVars + 2 jagged_counts = np.ones(bins[bin_order[0]].size-1,dtype=np.int) if len(bin_order) > 1: - jagged_counts = np.maximum(bins[bin_order[1]].counts - 1,0) #need counts-1 since we only care about Nbins + jagged_counts = np.maximum(bins[bin_order[1]].counts-1,0) #need counts-1 since we only care about Nbins for i in range(nEvalVars): var_order.append(layout[i+offset_name]) if not interpolatedFunc: clamp_mins[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col]])) clamp_maxs[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col+1]])) offset_col += 1 - + + #now get the parameters, which we will look up with the clamped values parms = [] parm_order = [] @@ -198,7 +203,78 @@ def convert_junc_txt_file(juncFilePath): knots = np.array([np.unique(knot.flatten())[0] for knot in knots]) vallist[2] = ({'knots':knots,'ups':ups.T,'downs':downs.T},vallist[2][-1]) vallist = vallist[:-1] - #for down in downs: print down - #for up in ups: print up wrapped_up[newkey] = tuple(vallist) return wrapped_up + +def convert_effective_area_file(eaFilePath): + ea_f = open(eaFilePath,'r') + layoutstr = ea_f.readline().strip().strip('{}') + ea_f.close() + + name = eaFilePath.split('/')[-1].split('.')[0] + + layout = layoutstr.split() + if not layout[0].isdigit(): + raise Exception('First column of Effective Area File Header must be a digit!') + + #setup the file format + nBinnedVars = int(layout[0]) + nBinColumns = 2*nBinnedVars + nEvalVars = int(layout[nBinnedVars+1]) + + minMax = ['Min','Max'] + columns = [] + dtypes = [] + offset=1 + for i in range(nBinnedVars): + columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax]) + dtypes.extend(['