Merge pull request #90 from RNO-G/more-tests

More tests
RNO-G · Nov 14, 2024 · 10b8628 · 10b8628
2 parents 2c0c5b6 + 3154e28
commit 10b8628
Show file tree

Hide file tree

Showing 7 changed files with 208 additions and 51 deletions.
diff --git a/.github/workflows/install.yaml b/.github/workflows/install.yaml
@@ -29,22 +29,30 @@ jobs:
       uses: actions/cache@v4
       with:
         path: tests/data
-        key: data
+        key: test-data
     - name: Download sample file
       if: steps.cache-sample-file.outputs.cache-hit != 'true'
       run: |
-        wget https://rnog-data.zeuthen.desy.de/rnog_share/mattak_ci_data/station23_run325.root
-        wget https://rnog-data.zeuthen.desy.de/rnog_share/mattak_ci_data/volCalConsts_pol9_s11_1687936603-1687938078.root
-        mkdir -p tests/data/station23/run325
-        mv -v station23_run325.root tests/data/station23/run325/combined.root
-        mv -v volCalConsts_pol9_s11_1687936603-1687938078.root tests/data/station23/run325/
+        wget https://rnog-data.zeuthen.desy.de/rnog_share/mattak_ci/station23.tar.gz
+        mkdir -p tests/data
+        tar -xavf station23.tar.gz -C tests/data/
+    - name: Get previous benchmark results
+      uses: actions/cache/restore@v4
+      with:
+        path: tests/benchmark.json
+        key: benchmark-data
     - name: Test installation (ROOTless)
       run: |
         pip install -v .
     - name: Read a sample file (ROOTless)
       run: |
-        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend uproot
-        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend uproot --calibrate -vc tests/data/station23/run325/volCalConsts_pol9_s11_1687936603-1687938078.root
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend uproot --benchmark ${{ github.sha }}
+    - name: Calibrate a sample file (ROOTless)
+      run: |
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend uproot --calibrate -vc tests/data/station23/run325/volCalConsts_pol9_s11_1687936603-1687938078.root --benchmark ${{ github.sha }}
+    - name: Read incomplete data from headers (ROOTless)
+      run: |
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend uproot --no-skip-incomplete --benchmark ${{ github.sha }}
     - name: Cache ROOT
       id: cache-root
       uses: actions/cache@v4
@@ -67,8 +75,15 @@ jobs:
     - name: Read a sample file (ROOT)
       run: |
         source root/bin/thisroot.sh
-        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend pyroot
-        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend pyroot --calibrate -vc tests/data/station23/run325/volCalConsts_pol9_s11_1687936603-1687938078.root
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend pyroot --benchmark ${{ github.sha }}
+    - name: Calibrate a sample file
+      run: |
+        source root/bin/thisroot.sh
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend pyroot --calibrate -vc tests/data/station23/run325/volCalConsts_pol9_s11_1687936603-1687938078.root --benchmark ${{ github.sha }}
+    - name: Read incomplete data from headers
+      run: |
+        source root/bin/thisroot.sh
+        python3 tests/test_dataset.py --station=0 --run=0 --data_dir=./tests/data/station23/run325 --backend pyroot --no-skip-incomplete --benchmark ${{ github.sha }}
     - name: Read a sample file (all)
       run: |
         source root/bin/thisroot.sh
@@ -79,3 +94,12 @@ jobs:
         source root/bin/thisroot.sh
         python3 tests/compare_backends.py --station=0 --run=0 --data_dir tests/data/station23/run325/ -vc tests/data/station23/run325//volCalConsts_pol9_s11_1687936603-1687938078.root
         python3 tests/compare_backends.py --station=0 --run=0 --data_dir tests/data/station23/run325/ -vc tests/data/station23/run325//volCalConsts_pol9_s11_1687936603-1687938078.root --calibrate
+    - name: Benchmark against previous results
+      run: |
+        python3 tests/evaluate_benchmarks.py --benchmark-file tests/benchmark.json ${{ github.sha }}
+    - name: Export benchmark results (only on main)
+      if: ${{ github.ref == 'refs/heads/main'}}
+      uses: actions/cache/save@v4
+      with:
+        key: benchmark-data
+        path: tests/benchmark.json
diff --git a/py/mattak/backends/pyroot/dataset.py b/py/mattak/backends/pyroot/dataset.py
@@ -140,14 +140,8 @@ def _eventInfo(self, i : int) -> Optional[mattak.Dataset.EventInfo]:
             radiantThrs = numpy.array(daq_status.radiant_thresholds)
             lowTrigThrs = numpy.array(daq_status.lt_trigger_thresholds)
 
-        # the default value for the sampling rate (3.2 GHz) which is used
-        # for data which does not contain this information in the waveform files
-        # is set in the header fils Waveforms.h
-        try:
-            sampleRate = self.ds.raw().radiant_sampling_rate / 1000
-        except ReferenceError:
-            # Fall back to runinfo (as in uproot backend)
-            sampleRate = self.ds.info().radiant_sample_rate / 1000
+        # now use Dataset's faster sample rate getter
+        sampleRate = self.ds.radiantSampleRate() / 1000
 
         hdr = self.ds.header()
 
@@ -174,7 +168,7 @@ def _eventInfo(self, i : int) -> Optional[mattak.Dataset.EventInfo]:
             dtype='uint8', count=self.NUM_CHANNELS * 2).reshape(self.NUM_CHANNELS, 2))
 
         readout_delay = numpy.copy(numpy.around(numpy.frombuffer(
-            cppyy.ll.cast['float*'](self.ds.raw().digitizer_readout_delay_ns),
+            cppyy.ll.reinterpret_cast['float*'](self.ds.radiantReadoutDelays()),
             dtype = numpy.float32, count=self.NUM_CHANNELS)))
 
         return mattak.Dataset.EventInfo(
@@ -191,7 +185,7 @@ def _eventInfo(self, i : int) -> Optional[mattak.Dataset.EventInfo]:
             sampleRate=sampleRate,
             radiantThrs=radiantThrs,
             lowTrigThrs=lowTrigThrs,
-            hasWaveforms=not isNully(self.ds.raw()),
+            hasWaveforms= self.ds.rawAvailable(),
             readoutDelay=readout_delay)
 
 

diff --git a/py/mattak/backends/uproot/dataset.py b/py/mattak/backends/uproot/dataset.py
@@ -269,14 +269,15 @@ def __init__(self, station : int, run : int, data_path : str, verbose : bool = F
 
 
     def eventInfo(self, override_skip_incomplete : Optional[bool] = None) -> Union[Optional[mattak.Dataset.EventInfo], Sequence[Optional[mattak.Dataset.EventInfo]]]:
-        kw = dict(entry_start = self.first, entry_stop = self.last)
+        kw = dict(entry_start = self.first, entry_stop = self.last, library='np')
 
         station = self._hds['station_number'].array(**kw)
         run = self._hds['run_number'].array(**kw)
         eventNumber = self._hds['event_number'].array(**kw)
         readoutTime = self._hds['readout_time'].array(**kw)
         triggerTime = self._hds['trigger_time'].array(**kw)
-        triggerInfo = self._hds['trigger_info'].array(**kw)
+        # triggerInfo is a branch, so we use awkward instead of numpy
+        triggerInfo = self._hds['trigger_info'].array(entry_start=self.first, entry_stop=self.last)
         pps = self._hds['pps_num'].array(**kw)
         sysclk = self._hds['sysclk'].array(**kw)
         sysclk_lastpps = self._hds['sysclk_last_pps'].array(**kw)
@@ -320,23 +321,23 @@ def eventInfo(self, override_skip_incomplete : Optional[bool] = None) -> Union[O
 
         infos = []
         info = None  # if range(0)
-        for i in range(self.last - self.first):
+        for i, t_info in zip(range(self.last - self.first), triggerInfo):
 
             if override_skip_incomplete is not None and override_skip_incomplete:
                 if eventNumber[i] not in self.events_with_waveforms.keys():
                     continue
 
             triggerType  = "UNKNOWN"
-            if triggerInfo[i]['trigger_info.radiant_trigger']:
-                which = triggerInfo[i]['trigger_info.which_radiant_trigger']
+            if t_info['trigger_info.radiant_trigger']:
+                which = t_info['trigger_info.which_radiant_trigger']
                 if which == -1:
                     which = "X"
                 triggerType = "RADIANT" + str(which)
-            elif triggerInfo[i]['trigger_info.lt_trigger']:
+            elif t_info['trigger_info.lt_trigger']:
                 triggerType = "LT"
-            elif triggerInfo[i]['trigger_info.force_trigger']:
+            elif t_info['trigger_info.force_trigger']:
                 triggerType = "FORCE"
-            elif triggerInfo[i]['trigger_info.pps_trigger']:
+            elif t_info['trigger_info.pps_trigger']:
                 triggerType = "PPS"
 
             radiantThrs = None

diff --git a/src/Dataset.cc b/src/Dataset.cc
@@ -4,6 +4,8 @@
 #include "TPluginManager.h" 
 #include <iostream>
 
+
+
 template <typename D>
 static void clear(mattak::Dataset::field<D> * field)
 {
@@ -18,9 +20,9 @@ static void clear(mattak::Dataset::field<D> * field)
 
 
 template <typename D>
-static void clear(mattak::Dataset::tree_field<D> * field)
+static void clear(mattak::Dataset::tree_field<D> * field, bool borrowed = false)
 {
-  if (field->file)
+  if (!borrowed && field->file)
   {
     delete field->file;
   }
@@ -50,6 +52,31 @@ static void clear(mattak::Dataset::file_field<D> * field)
 #define BITBUCKET "/dev/null"
 #endif
 
+void mattak::Dataset::setupRadiantMeta()
+{
+
+  // HACK: separately make the sample rate from the waveform file
+  clear(&sample_rate,true);
+  clear(&delays,true);
+  sample_rate.branch = wf.tree->GetBranch("radiant_sampling_rate");
+  if (sample_rate.branch)
+  {
+    sample_rate.ptr = new uint32_t; //ugh
+    sample_rate.file = wf.file;
+    sample_rate.tree = wf.tree;
+    sample_rate.branch->SetAddress(sample_rate.ptr);
+  }
+
+  delays.branch = wf.tree->GetBranch("digitizer_readout_delay_ns");
+  if (delays.branch)
+  {
+    delays.ptr = new std::array<float, mattak::k::num_radiant_channels> {};
+    delays.file = wf.file;
+    delays.tree = wf.tree;
+    delays.branch->SetAddress(delays.ptr);
+  }
+}
+
 /** Silently check if file exists, supporting all protocols ROOT does */
 static TFile * silentlyTryToOpen(const char * uri, const char * opt = "" )
 {
@@ -126,6 +153,8 @@ static int setup(mattak::Dataset::file_field<D> * field, const char * filename,
 void mattak::Dataset::unload()
 {
   clear(&wf);
+  clear(&sample_rate,true);
+  clear(&delays,true);
   clear(&hd);
   clear(&ds);
   clear(&pd);
@@ -270,6 +299,9 @@ int mattak::Dataset::loadCombinedFile(const char * f)
     std::cerr << "Could not load waveforms and headers from " << f << std::endl;
     return -1;
   }
+
+  setupRadiantMeta();
+
   if (opt.verbose) std::cout << "Found waveforms and headers in" << f << std::endl;
 
   // Try some optionalish things
@@ -344,6 +376,8 @@ int mattak::Dataset::loadDir(const char * dir)
     }
   }
 
+  setupRadiantMeta();
+
   //now load the header files
   if (opt.verbose) std::cout << "about to load headers " << std::endl;
   if (setup(&hd, Form("%s/%s.root", dir, (full_dataset || !opt.partial_skip_incomplete) ? "headers" : partial_file), header_tree_names))
@@ -427,34 +461,53 @@ mattak::Header* mattak::Dataset::header(bool force)
   return hd.ptr;
 }
 
-mattak::Waveforms* mattak::Dataset::raw(bool force)
+template <typename T> 
+static void findIncompleteEntry(mattak::Dataset::tree_field<T> * field, mattak::Dataset * d, bool force)
 {
-  if (force || wf.loaded_entry != current_entry)
+  if (force || field->loaded_entry != d->currentEntry())
   {
-    if (wf.tree == nullptr) return nullptr;
-
-    if (full_dataset || opt.partial_skip_incomplete)
+    if (d->isFullDataset() || d->getOpt().partial_skip_incomplete)
     {
-      wf.branch->GetEntry(current_entry);
+      field->branch->GetEntry(d->currentEntry());
     }
     else
     {
-      int wf_entry = wf.tree->GetEntryNumberWithIndex(header(force)->event_number);
-      if (wf_entry < 0)
+      int entry = field->tree ? field->tree->GetEntryNumberWithIndex(d->header(force)->event_number) : -1;
+      if (entry < 0) 
       {
-        wf.missing_entry = true;
+        field->missing_entry = true;
       }
       else
       {
-        wf.missing_entry = false;
-        wf.branch->GetEntry(wf_entry);
+        field->missing_entry = false; 
+        field->branch->GetEntry(entry);
       }
-
     }
-
-    wf.loaded_entry = current_entry;
   }
+}
+
+
+float mattak::Dataset::radiantSampleRate(bool force)
+{
+  if (sample_rate.ptr == nullptr) return (info() && info()->radiant_sample_rate) ? info()->radiant_sample_rate : 3200;
+  findIncompleteEntry(&sample_rate,this, force);
+  return sample_rate.missing_entry ? 3200 : ((float) *sample_rate.ptr);
+}
 
+static std::array<float, mattak::k::num_radiant_channels> zeros{}; 
+
+const float *  mattak::Dataset::radiantReadoutDelays(bool force)
+{
+  if (delays.ptr == nullptr) return (const float*) &zeros; 
+  findIncompleteEntry(&delays,this, force);
+  return (const float*)  (  delays.missing_entry ? &zeros : delays.ptr );
+}
+
+
+mattak::Waveforms* mattak::Dataset::raw(bool force)
+{
+  if (wf.tree == nullptr) return nullptr; 
+  findIncompleteEntry(&wf, this, force);
   return wf.missing_entry ? nullptr: wf.ptr;
 }
 

diff --git a/src/mattak/Dataset.h b/src/mattak/Dataset.h
@@ -71,6 +71,7 @@ namespace mattak
       int loadDir(const char * dir);
       int loadCombinedFile(const char * file);
 
+      int currentEntry() const { return current_entry; }
 
       /**
        * Deprecated, kept for ABI compatibility
@@ -87,6 +88,21 @@ namespace mattak
 
       mattak::Header * header(bool force_reload = false);
       mattak::Waveforms * raw(bool force_reload = false);
+
+      // is the raw data available for currentEntry? (mostly used by PyROOT backend) 
+      bool rawAvailable(bool force_reload = false) 
+      {  
+        return  wf.tree && 
+         ( full_dataset || opt.partial_skip_incomplete || 
+           wf.tree->GetEntryNumberWithIndex(header(force_reload)->event_number) >=0); 
+      }
+
+      // these methods are useful if you want to read waveform metadata without reading the waveforms
+      // if you are reading the waveforms, they are less efficient than getting what you want from raw
+      float radiantSampleRate(bool force_reload = false);
+      const float * radiantReadoutDelays(bool force_reload = false);  //size is mattak::k::num_radiant_channels, returning a float* since cppyyy doesn't seem to be able to deal with std::array properly
+
+
       mattak::CalibratedWaveforms * calibrated(bool force_reload = false); //will be nullptr if no calibration is passed
       mattak::DAQStatus * status(bool force_reload = false);
       mattak::RunInfo * info() const { return runinfo.ptr; }
@@ -129,18 +145,22 @@ namespace mattak
       static const char ** getDAQStatusTreeNames();
       static const char ** getPedestalTreeNames();
     private:
-
-
       tree_field<Waveforms> wf;
       tree_field<Header> hd;
       tree_field<DAQStatus> ds;
       tree_field<Pedestals> pd;
       file_field<RunInfo> runinfo;
+
+      tree_field<uint32_t> sample_rate;
+      tree_field<std::array<float,mattak::k::num_radiant_channels>> delays;
+      void setupRadiantMeta();
+
       field<CalibratedWaveforms> calib_wf;
 
       void unload();
       int current_entry = 0;
 
+
       bool full_dataset ;
       DatasetOptions opt;
 

diff --git a/tests/evaluate_benchmarks.py b/tests/evaluate_benchmarks.py
@@ -0,0 +1,34 @@
+import argparse
+import json
+import numpy as np
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--benchmark-file', default='benchmark.json', help='Path to json file with benchmark data')
+argparser.add_argument('--threshold', type=float, default=1.5, help='Maximum increase over median previous benchmark; will raise an error if exceeded.')
+argparser.add_argument('tag', type=str, help='Tag of benchmark to test against other benchmarks')
+
+args = argparser.parse_args()
+
+with open(args.benchmark_file, 'r') as f:
+    benchmarks = json.load(f)
+
+test_benchmark = benchmarks.pop(args.tag)
+exit_code = 0
+for key, test_value in test_benchmark.items():
+    old_values = []
+    for run_tag, run in benchmarks.items():
+        if key in run:
+            old_values.append(run[key])
+
+    if not len(old_values):
+        print(f'Skipping key: {key} with no previous benchmark data.')
+        continue
+
+    reference = np.median(old_values)
+    print(f'{key:20s} : {test_value*1e3:-7.3f} ms / {reference*1e3:-7.3f} ({test_value/reference*100:-4.0f} %)')
+    exit_code += test_value/reference > args.threshold
+
+if exit_code:
+    print(f"!!! {exit_code} benchmark tests have failed !!!")
+
+exit(exit_code)