From 342535385d8ac90f9af8e6c6563ab29abf8e2442 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Fri, 27 Oct 2023 12:47:42 +0200
Subject: [PATCH 01/12] Added reader and help functions to convert scikit-hep
 histogram to Tables

---
 examples/reading_scikihep_histograms.ipynb | 374 +++++++++++++++++++++
 hepdata_lib/hist_utils.py                  | 213 ++++++++++++
 tests/test_histread.py                     |  54 +++
 3 files changed, 641 insertions(+)
 create mode 100644 examples/reading_scikihep_histograms.ipynb
 create mode 100644 hepdata_lib/hist_utils.py
 create mode 100644 tests/test_histread.py

diff --git a/examples/reading_scikihep_histograms.ipynb b/examples/reading_scikihep_histograms.ipynb
new file mode 100644
index 00000000..d559eb30
--- /dev/null
+++ b/examples/reading_scikihep_histograms.ipynb
@@ -0,0 +1,374 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Reading histograms\n",
+    "\n",
+    "For the new python-based frameworks, another common task would be to translate\n",
+    "histogram in the [`scikit-hep.hist`](https://hist.readthedocs.io/en/latest/)\n",
+    "package into the HEPData format. The functions in the `hepdata_lib` will help\n",
+    "you with that, and this notebook will demonstrate how to do that.\n",
+    "\n",
+    "As explained in the [Getting started notebook](Getting_started.ipynb), a\n",
+    "`Submission` needs to exist or be created. Here, we'll just create one without\n",
+    "any additional information:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Welcome to JupyROOT 6.28/04\n"
+     ]
+    }
+   ],
+   "source": [
+    "from hepdata_lib import Submission\n",
+    "\n",
+    "submission = Submission()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The common use-case `scikit-hep` histograms is to allow for after-the-fact\n",
+    "slicing and grouping from a main histogram. Let us first generate a fake\n",
+    "histogram that may appear in common histograms, as well as a common slicing\n",
+    "routine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Hist(\n",
+       "  StrCategory(['data', 'QCD', 'ttbar'], name='dataset'),\n",
+       "  IntCategory([-1, 0, 4, 5], name='flavor'),\n",
+       "  Regular(60, -3, 3, name='eta'),\n",
+       "  Regular(20, 0, 500, name='pt'),\n",
+       "  storage=Weight()) # Sum: WeightedSum(value=221221, variance=123802) (WeightedSum(value=256973, variance=143935) with flow)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import hist\n",
+    "import numpy as np\n",
+    "\n",
+    "rng = np.random.default_rng(seed=123_456_789)\n",
+    "\n",
+    "h = hist.Hist(\n",
+    "    hist.axis.StrCategory([\"data\", \"QCD\", \"ttbar\"], name=\"dataset\"),\n",
+    "    hist.axis.IntCategory([-1, 0, 4, 5], name=\"flavor\"),\n",
+    "    hist.axis.Regular(60, -3, +3, name=\"eta\"),\n",
+    "    hist.axis.Regular(20, 0, 500, name=\"pt\"),\n",
+    "    storage=hist.storage.Weight(),\n",
+    ")\n",
+    "\n",
+    "h.fill(  ## For mock data\n",
+    "    dataset=\"data\",\n",
+    "    flavor=-1,\n",
+    "    eta=rng.normal(0, 2.0, size=123_456),\n",
+    "    pt=rng.exponential(100, size=123_456),\n",
+    ")\n",
+    "h.fill(  ## For Mock QCD\n",
+    "    dataset=\"QCD\",\n",
+    "    flavor=rng.choice([0, 4, 5], size=1_000_000, p=[0.8, 0.15, 0.05]),\n",
+    "    eta=rng.normal(0.0, 2.0, size=1_000_000),\n",
+    "    pt=rng.exponential(100, size=1_000_000),\n",
+    "    weight=0.123456 * 2 * rng.random(size=1_000_000),\n",
+    ")\n",
+    "h.fill(  ## For mock ttbar\n",
+    "    dataset=\"ttbar\",\n",
+    "    flavor=rng.choice([0, 4, 5], size=1_000_000, p=[0.45, 0.1, 0.45]),\n",
+    "    eta=rng.normal(0.0, 1.5, size=1_000_000),\n",
+    "    pt=rng.exponential(200, size=1_000_000),\n",
+    "    weight=0.01 * 2 * rng.random(size=1_000_000),\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example of manual processing to 1D array\n",
+    "\n",
+    "Let use create a simple slicing routine to get the various histograms of\n",
+    "interest, then use the most versatile function, the\n",
+    "`hepdata_lib.hist_utils.read_hist` method, to create arrays that will be\n",
+    "compatible with variable creation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'pt': array([(  0.,  25.), ( 25.,  50.), ( 50.,  75.), ( 75., 100.),\n",
+      "       (100., 125.), (125., 150.), (150., 175.), (175., 200.),\n",
+      "       (200., 225.), (225., 250.), (250., 275.), (275., 300.),\n",
+      "       (300., 325.), (325., 350.), (350., 375.), (375., 400.),\n",
+      "       (400., 425.), (425., 450.), (450., 475.), (475., 500.)],\n",
+      "      dtype=[('f0', '<f4'), ('f1', '<f4')]), 'hist_value': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
+      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
+      "         485.,   401.,   294.,   230.]), 'hist_variance': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
+      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
+      "         485.,   401.,   294.,   230.])}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from hepdata_lib.hist_utils import read_hist\n",
+    "\n",
+    "data_hist = h[dict(dataset='data', flavor=sum, eta=sum)]\n",
+    "fqcd_hist = h[dict(dataset='QCD', flavor=sum, eta=slice(1.4j,None,sum))] +  h[dict(dataset='QCD', flavor=sum, eta=slice(None,-1.4j,sum))]\n",
+    "cqcd_hist = h[dict(dataset='QCD', flavor=sum, eta=slice(-1.4j, 1.4j,sum))]\n",
+    "tt_b_hist = h[dict(dataset='ttbar', flavor=4j, eta=sum)]\n",
+    "tt_l_hist = h[dict(dataset='ttbar', flavor=0j, eta=sum)]\n",
+    "\n",
+    "tab_data = read_hist(data_hist)\n",
+    "tab_fqcd = read_hist(fqcd_hist)\n",
+    "tab_cqcd = read_hist(cqcd_hist)\n",
+    "tab_tt_b = read_hist(tt_b_hist)\n",
+    "tab_tt_l = read_hist(tt_l_hist)\n",
+    "\n",
+    "print(tab_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All axes remaining will generate a corresponding array that can be used to\n",
+    "declare `Variable` instances. Notice that because the histogram was declared\n",
+    "with `storage=Weight`, entries for `hist_value` (sum of weights) and\n",
+    "`hist_variance` (sum of weight-squared) will be presented to the user. This\n",
+    "information can the be used for the uncertainty generation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hepdata_lib import Table, Variable, Uncertainty\n",
+    "import hist.intervals\n",
+    "\n",
+    "tab1d = Table(\"my table\")\n",
+    "\n",
+    "var = Variable(\"Jet pT\", is_independent=True, is_binned=True, units=\"GeV\")\n",
+    "var.values = tab_data[\"pt\"]\n",
+    "tab1d.add_variable(var)\n",
+    "\n",
+    "# Filling in the data entries\n",
+    "data_var = Variable(\n",
+    "    \"data\", is_independent=False, is_binned=False, units=\"Number of jets\"\n",
+    ")\n",
+    "data_var.values = tab_data[\"hist_value\"]\n",
+    "tab1d.add_variable(data_var)\n",
+    "\n",
+    "# Fill and example MC entry with uncertainty\n",
+    "fqcd_var = Variable(\n",
+    "    \"QCD (MC) forward region\",\n",
+    "    is_independent=False,\n",
+    "    is_binned=False,\n",
+    "    units=\"Number of jets\",\n",
+    ")\n",
+    "fqcd_var.values = tab_fqcd[\"hist_value\"]\n",
+    "fqcd_stat = Uncertainty(\"statistical uncertainty\", is_symmetric=False)\n",
+    "s, s2 = tab_fqcd[\"hist_value\"], tab_fqcd[\"hist_variance\"]\n",
+    "lo, up = hist.intervals.poisson_interval(s, s2)\n",
+    "lo, up = lo - s, up - s\n",
+    "fqcd_stat.values = zip(lo, up)\n",
+    "fqcd_var.add_uncertainty(fqcd_stat)\n",
+    "tab1d.add_variable(fqcd_var)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To improve the readability of uncertainty variable declaration, the `hist_utils`\n",
+    "package also provides, `hist_as_variable` method to easily declare variables.\n",
+    "Uncertainties should be provided using a dictionary, with the dictionary keys\n",
+    "being the desired name of the uncertainty, and the dictionary values being how\n",
+    "the uncertainties should be defined. This can either be a string indicating how\n",
+    "the uncertainty should be calculated (`poisson_sym` or `poisson_asym`), a\n",
+    "floating point (pair) to indicate flat, a histogram (pair) of the same format as\n",
+    "the input histogram representing the uncertainty, or a numpy array (pair) the is\n",
+    "compatible with the final array output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hepdata_lib.hist_utils import hist_as_variable\n",
+    "\n",
+    "tt_b_var = hist_as_variable(\n",
+    "    \"ttbar b jets\",\n",
+    "    tt_b_hist,\n",
+    "    uncertainty={\n",
+    "        \"statistical\": \"poisson_sym\",\n",
+    "        \"flat symmetric uncertainty\": 0.1,\n",
+    "        \"flat asymmetric uncertainty\": (-0.02, +0.03),\n",
+    "        \"asymmetric uncertainty from histogram\": (\n",
+    "            tt_l_hist * (-0.02),\n",
+    "            tt_l_hist * 0.03,\n",
+    "        ),\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "tab1d.add_variable(tt_b_var)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example using N-dimensional histogram \n",
+    "\n",
+    "Notice that because the flexibility of the scikit-hep histogram syntax, the same\n",
+    "syntax can be used for the histograms of arbitrary dimensions\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tab2d = Table(\"my table 2d\")\n",
+    "\n",
+    "\n",
+    "data_hist = h[dict(dataset=\"data\", flavor=sum)]\n",
+    "qcd_hist = h[dict(dataset=\"QCD\", flavor=sum)]\n",
+    "tt_b_hist = h[dict(dataset=\"ttbar\", flavor=4j)]\n",
+    "tt_l_hist = h[dict(dataset=\"ttbar\", flavor=0j)]\n",
+    "\n",
+    "tab_data = read_hist(data_hist)\n",
+    "# tab_qcd = read_hist(qcd_hist)\n",
+    "# tab_tt_b = read_hist(tt_b_hist)\n",
+    "# tab_tt_l = read_hist(tt_l_hist)\n",
+    "\n",
+    "pt_var = Variable(\"Jet pT\", is_independent=True, is_binned=True, units=\"GeV\")\n",
+    "pt_var.values = tab_data[\"pt\"]\n",
+    "tab2d.add_variable(pt_var)\n",
+    "eta_var = Variable(\"Jet eta\", is_independent=True, is_binned=True)\n",
+    "eta_var.values = tab_data[\"eta\"]\n",
+    "tab2d.add_variable(eta_var)\n",
+    "\n",
+    "# Filling in the data entries\n",
+    "data_var = Variable(\n",
+    "    \"data\", is_independent=False, is_binned=False, units=\"Number of jets\"\n",
+    ")\n",
+    "data_var.values = tab_data[\"hist_value\"]\n",
+    "tab2d.add_variable(data_var)\n",
+    "\n",
+    "## Fill QCD, just statistical uncertainty\n",
+    "qcd_var = hist_as_variable(\n",
+    "    \"QCD MC\", qcd_hist, uncertainty={\"statistical\": \"poisson_asym\"}\n",
+    ")\n",
+    "tab2d.add_variable(qcd_var)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To further simply the construction of table from N-dimensional histograms, we\n",
+    "provide a `create_hist_base_table` function such that the axes variables are\n",
+    "automatically setup.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hepdata_lib.hist_utils import create_hist_base_table\n",
+    "\n",
+    "tab2d_v2 = create_hist_base_table(\n",
+    "    \"my simplifed 2d table\",\n",
+    "    data_hist,\n",
+    "    axes_rename={\"pt\": \"Jet pT\", \"eta\": \"Jet eta\"},\n",
+    "    axes_units={\"pt\": \"GeV\"},\n",
+    ")\n",
+    "\n",
+    "tab2d_v2.add_variable(hist_as_variable(\"Data\", data_hist))\n",
+    "tab2d_v2.add_variable(\n",
+    "    hist_as_variable(\n",
+    "        \"ttbar l jets\", tt_l_hist, uncertainty={\"statistical\": \"poisson_sym\"}\n",
+    "    )\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Outputting the submission\n",
+    "\n",
+    "Finally, we can add the table to the sumission and create the files. Please\n",
+    "refer to the [Getting started notebook](Getting_started.ipynb) for a complete\n",
+    "example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submission.add_table(tab1d)\n",
+    "submission.add_table(tab2d)\n",
+    "submission.add_table(tab2d_v2)\n",
+    "submission.create_files(\"example_output_hist\", remove_old=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hepdata_lib development environment",
+   "language": "python",
+   "name": "hepdata_env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
new file mode 100644
index 00000000..0c071b46
--- /dev/null
+++ b/hepdata_lib/hist_utils.py
@@ -0,0 +1,213 @@
+"""hepdata_lib utilities for interacting with scikit-hep hist histograms"""
+from typing import Tuple, Optional, Union, List, Dict
+import numpy
+
+from hepdata_lib import Table, Variable, Uncertainty
+
+# scikit-hep hist package
+import hist
+import hist.intervals
+
+
+def read_hist(h: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
+    """
+    Converting the scikit-hep histogram in to a dictionary of numpy arrays that
+    can be used for hepdata_lib Variable and Uncertainty declaration.
+
+    For all axes define in the histogram, a `hepdata_lib.Variable` with
+    `is_independent=True` should be declared. The `values` of this variable will
+    be stored in the return dictionary following the axes names.
+
+    Overflow and underflow bin will be handled using a single flag for all axes,
+    so be sure to declare/modify histogram axes properties according to your
+    needs.
+
+    The storage content will be returned as is, so additional uncertainty
+    processing will need to be handled by the user using the return values.
+    """
+    axes_entries = [_get_histaxis_array(ax, flow=flow) for ax in h.axes]
+    axes_entries = numpy.meshgrid(*axes_entries)
+
+    ## Getting axes return values
+    readout = {ax.name: axes_entries[idx].flatten() for idx, ax in enumerate(h.axes)}
+
+    ## Getting the histogram return values
+    view = h.view(flow=flow)
+
+    _storage_keys = {
+        hist.storage.Weight: ["value", "variance"],
+        hist.storage.Mean: ["value", ""],
+    }
+
+    if h.storage_type is hist.storage.Int64 or h.storage_type is hist.storage.Double:
+        readout["hist_value"] = view.flatten()
+    elif h.storage_type in _storage_keys:
+        for key in _storage_keys[h.storage_type]:
+            readout["hist_" + key] = view[key].flatten()
+    else:
+        raise NotImplementedError(
+            f"Storage type {h.storage_type} currently not implemented"
+        )
+
+    return readout
+
+
+def _get_histaxis_array(axis, flow: bool) -> numpy.ndarray:
+    """
+    Given an axis array, return the bin entries and a numpy array.
+
+    For continuous axes, the return will be a Nx2 array of bin edge pairs. For
+    categorical axes, the return will be a N array of bin content values. If the
+    flow is set to true, the function will also add the overflow/underflow bins
+    according to the settings found in axis.traits. For categorical axis, this
+    will include an extra `__UNDETERMINED__` entry or a +1 entry.
+    """
+
+    ## Getting the entries as a simple list
+    entries = [x for x in axis]
+
+    ## Adding overflow bin
+    if flow and axis.traits.overflow:
+        if isinstance(axis, hist.axis.StrCategory):
+            entries.append("__UNDETERMINED__")
+        elif isinstance(axis, hist.axis.IntCategory):
+            entries.append(entries[-1] + 1)
+        else:
+            entries.append((axis.edges[-1], numpy.inf))
+
+    ## Adding underflow bin
+    if flow and axis.traits.underflow:
+        entries = [(-numpy.inf, axis.edges[0])] + entries
+
+    ## Converting to numpy array
+    if axis.traits.continuous:
+        return numpy.array(entries, dtype="f,f")
+    else:
+        return numpy.array(entries)
+
+
+def hist_as_variable(
+    var_name,
+    h: hist.Hist,
+    flow: bool = False,
+    uncertainty: Optional[Dict[str, Union[str, hist.Hist, numpy.ndarray]]] = None,
+    **kwargs,
+) -> Variable:
+    """
+    Returning this histogram entries as a Variable object, with a simpler
+    interface for modifying uncertainty
+    """
+    if uncertainty is None:
+        uncertainty = {}
+
+    readout = read_hist(h, flow=flow)
+    var = Variable(
+        var_name,
+        is_independent=False,
+        is_binned=False,
+        **kwargs,
+    )
+    var.values = readout["hist_value"]
+
+    def _make_unc_array(x):
+        if isinstance(x, float):
+            return numpy.ones_like(readout["hist_value"]) * x
+        elif isinstance(x, numpy.ndarray):
+            return x
+        elif isinstance(x, hist.Hist):
+            return read_hist(x, flow=flow)["hist_value"]
+        else:
+            raise NotImplementedError(f"Unknown uncertainty format! {type(x)}")
+
+    for unc_name, unc_proc in uncertainty.items():
+        is_symmetric = None
+        if isinstance(unc_proc, str):
+            if unc_proc == "poisson_sym":  # Symmetric poisson uncertainty
+                is_symmetric = True
+                arr = _make_poisson_unc_array(readout, is_symmetric)
+            elif unc_proc == "poisson_asym":  # Asymmetric poisson uncertainty
+                is_symmetric = False
+                arr = _make_poisson_unc_array(readout, is_symmetric)
+            else:
+                raise NotImplementedError(f"Unknown uncertainty process {unc_proc}")
+        elif isinstance(unc_proc, tuple):  # Asymmetric uncertainty
+            is_symmetric = False  #
+            assert len(unc_proc) == 2, "Asymmetric uncertainty can only have 2 entries"
+            lo, up = _make_unc_array(unc_proc[0]), _make_unc_array(unc_proc[1])
+            arr = [x for x in zip(lo, up)]
+        else:  # Assuming symmetric error
+            is_symmetric = True
+            arr = _make_unc_array(unc_proc)
+
+        assert len(arr) == len(readout["hist_value"]), "Mismatch array formats"
+
+        unc_var = Uncertainty(unc_name, is_symmetric=is_symmetric)
+        unc_var.values = arr
+        var.add_uncertainty(unc_var)
+
+    return var
+
+
+def _make_poisson_unc_array(
+    readout: Dict[str, numpy.ndarray], symmetric: bool
+) -> numpy.ndarray:
+    if symmetric:
+        if "hist_variance" not in readout.keys():
+            numpy.sqrt(readout["hist_value"])
+        else:  # Effective number of events
+            sw, sw2 = readout["hist_value"], readout["hist_variance"]
+            n_events = numpy.divide(
+                sw**2, sw2, out=numpy.zeros_like(sw), where=(sw2 != 0)
+            )
+            rel_unc = numpy.divide(
+                numpy.sqrt(n_events),
+                n_events,
+                out=numpy.zeros_like(sw),
+                where=(n_events != 0),
+            )
+            return sw * rel_unc
+        return numpy.sqrt(n_events)
+    else:
+        sw, sw2 = readout["hist_value"], readout["hist_value"]
+        if "hist_variance" in readout.keys():
+            sw2 = readout["hist_variance"]
+        lo, up = hist.intervals.poisson_interval(sw, sw2)
+        lo, up = lo - sw, up - sw
+        return [x for x in zip(lo, up)]
+
+
+def create_hist_base_table(
+    table_name: str,
+    h: hist.Hist,
+    flow: bool = False,
+    axes_rename: Optional[Dict[str, str]] = None,  # Additional axes proces
+    axes_units: Optional[Dict[str, str]] = None,  # Additional axes proces
+) -> Table:
+    """
+    Preparing the table based on hist, allows for the additional exclusion of
+    axes via a list of string names
+    """
+    if axes_rename is None:
+        axes_rename = {}
+    if axes_units is None:
+        axes_units = {}
+    table = Table(table_name)
+
+    readout = read_hist(h)
+
+    for ax in h.axes:
+        var_name = ax.name
+        if ax.name in axes_rename:
+            var_name = axes_rename[ax.name]
+        elif ax.label:
+            var_name = ax.label
+        var = Variable(
+            var_name,
+            is_independent=True,
+            is_binned=ax.traits.continuous,
+            units=axes_units.get(ax.name, ""),
+        )
+        var.values = readout[ax.name]
+        table.add_variable(var)
+
+    return table
diff --git a/tests/test_histread.py b/tests/test_histread.py
new file mode 100644
index 00000000..2e5e49a0
--- /dev/null
+++ b/tests/test_histread.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+"""Test scikit-hep reading"""
+from unittest import TestCase
+import numpy as np
+import hist
+from hepdata_lib.hist_utils import *
+
+
+class TestHistUtils(TestCase):
+    """Test the hist_utils functions."""
+
+    base_hist = hist.Hist(
+        hist.axis.StrCategory(["data", "QCD", "ttbar"], name="dataset"),
+        hist.axis.IntCategory([-1, 0, 4, 5], name="flavor"),
+        hist.axis.Regular(60, -3, +3, name="eta"),
+        hist.axis.Regular(50, 0, 500, name="pt"),
+        storage=hist.storage.Weight(),
+    )
+    rng = np.random.default_rng(seed=123_456_789)
+
+    base_hist.fill(  ## For mock data
+        dataset="data",
+        flavor=-1,
+        eta=rng.normal(0, 2.0, size=123_456),
+        pt=rng.exponential(100, size=123_456),
+    )
+    base_hist.fill(  ## For Mock QCD
+        dataset="QCD",
+        flavor=rng.choice([0, 4, 5], size=1_000_000, p=[0.8, 0.15, 0.05]),
+        eta=rng.normal(0.0, 2.0, size=1_000_000),
+        pt=rng.exponential(100, size=1_000_000),
+        weight=0.123456 * 2 * rng.random(size=1_000_000),
+    )
+    base_hist.fill(  ## For mock ttbar
+        dataset="ttbar",
+        flavor=rng.choice([0, 4, 5], size=1_000_000, p=[0.45, 0.1, 0.45]),
+        eta=rng.normal(0.0, 1.5, size=1_000_000),
+        pt=rng.exponential(200, size=1_000_000),
+        weight=0.01 * 2 * rng.random(size=1_000_000),
+    )
+
+    def test_default_read(self):
+        try:
+            readout = read_hist(TestHistUtils.base_hist)
+        except:
+            self.fail("Histogram reading raised an unexpected exception.")
+
+        # Checking dimension compatibility
+        self.assertTrue(len(readout["dataset"]) == len(readout["flavor"]))
+        self.assertTrue(len(readout["dataset"]) == len(readout["eta"]))
+        self.assertTrue(len(readout["dataset"]) == len(readout["pt"]))
+
+        # Clean up
+        self.doCleanups()

From b6a0d9aa415941cf3105dc81a02b65d964cd6df8 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Fri, 27 Oct 2023 16:21:28 +0200
Subject: [PATCH 02/12] Updated documentation and dependencies

---
 README.md                                  |  3 +
 examples/reading_scikihep_histograms.ipynb | 79 ++++++++++++----------
 hepdata_lib/hist_utils.py                  | 42 ++++++++++--
 requirements.txt                           |  1 +
 4 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index ce20b896..7fcac52c 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,9 @@ There are a few more examples available that can directly be run using the [bind
 - [Reading TGraph and TGraphError from '.C' files](https://github.com/HEPData/hepdata_lib/blob/main/examples/read_c_file.ipynb)
 [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/HEPData/hepdata_lib/main?filepath=examples/read_c_file.ipynb)
 <br/><br/>
+- [Preparing scikit-hep histograms](https://github.com/HEPData/hepdata_lib/blob/main/examples/reading_scikithep_histogram.ipynb)
+[![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/HEPData/hepdata_lib/main?filepath=examples/reading_scikihep_histogram.ipynb)
+<br/><br/>
 
 ## External dependencies
 
diff --git a/examples/reading_scikihep_histograms.ipynb b/examples/reading_scikihep_histograms.ipynb
index d559eb30..7334b5cd 100644
--- a/examples/reading_scikihep_histograms.ipynb
+++ b/examples/reading_scikihep_histograms.ipynb
@@ -6,14 +6,15 @@
    "source": [
     "# Reading histograms\n",
     "\n",
-    "For the new python-based frameworks, another common task would be to translate\n",
-    "histogram in the [`scikit-hep.hist`](https://hist.readthedocs.io/en/latest/)\n",
-    "package into the HEPData format. The functions in the `hepdata_lib` will help\n",
-    "you with that, and this notebook will demonstrate how to do that.\n",
+    "For the new python-based frameworks, another common format would needs\n",
+    "translation are histogram in the\n",
+    "[`scikit-hep.hist`](https://hist.readthedocs.io/en/latest/). The functions in\n",
+    "the `hepdata_lib.hist_utils` will help you with that, and this notebook will\n",
+    "demonstrate how to do that.\n",
     "\n",
     "As explained in the [Getting started notebook](Getting_started.ipynb), a\n",
     "`Submission` needs to exist or be created. Here, we'll just create one without\n",
-    "any additional information:"
+    "any additional information.\n"
    ]
   },
   {
@@ -39,10 +40,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The common use-case `scikit-hep` histograms is to allow for after-the-fact\n",
-    "slicing and grouping from a main histogram. Let us first generate a fake\n",
+    "The common use-case for `scikit-hep` histograms is to allow for after-the-fact\n",
+    "slicing and grouping from a primary histogram. Let us first generate a fake\n",
     "histogram that may appear in common histograms, as well as a common slicing\n",
-    "routine"
+    "routine\n"
    ]
   },
   {
@@ -109,9 +110,9 @@
     "## Example of manual processing to 1D array\n",
     "\n",
     "Let use create a simple slicing routine to get the various histograms of\n",
-    "interest, then use the most versatile function, the\n",
+    "interest, then use the most general function, the\n",
     "`hepdata_lib.hist_utils.read_hist` method, to create arrays that will be\n",
-    "compatible with variable creation."
+    "compatible with `hepdata_lib.Variable` declaration.\n"
    ]
   },
   {
@@ -123,27 +124,33 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'pt': array([(  0.,  25.), ( 25.,  50.), ( 50.,  75.), ( 75., 100.),\n",
+      "{'hist_value': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
+      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
+      "         485.,   401.,   294.,   230.]),\n",
+      " 'hist_variance': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
+      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
+      "         485.,   401.,   294.,   230.]),\n",
+      " 'pt': array([(  0.,  25.), ( 25.,  50.), ( 50.,  75.), ( 75., 100.),\n",
       "       (100., 125.), (125., 150.), (150., 175.), (175., 200.),\n",
       "       (200., 225.), (225., 250.), (250., 275.), (275., 300.),\n",
       "       (300., 325.), (325., 350.), (350., 375.), (375., 400.),\n",
       "       (400., 425.), (425., 450.), (450., 475.), (475., 500.)],\n",
-      "      dtype=[('f0', '<f4'), ('f1', '<f4')]), 'hist_value': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
-      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
-      "         485.,   401.,   294.,   230.]), 'hist_variance': array([27405., 21382., 16585., 12740., 10069.,  7878.,  6007.,  4678.,\n",
-      "        3666.,  2903.,  2333.,  1734.,  1352.,  1048.,   851.,   634.,\n",
-      "         485.,   401.,   294.,   230.])}\n"
+      "      dtype=[('f0', '<f4'), ('f1', '<f4')])}\n"
      ]
     }
    ],
    "source": [
     "from hepdata_lib.hist_utils import read_hist\n",
+    "import pprint\n",
     "\n",
-    "data_hist = h[dict(dataset='data', flavor=sum, eta=sum)]\n",
-    "fqcd_hist = h[dict(dataset='QCD', flavor=sum, eta=slice(1.4j,None,sum))] +  h[dict(dataset='QCD', flavor=sum, eta=slice(None,-1.4j,sum))]\n",
-    "cqcd_hist = h[dict(dataset='QCD', flavor=sum, eta=slice(-1.4j, 1.4j,sum))]\n",
-    "tt_b_hist = h[dict(dataset='ttbar', flavor=4j, eta=sum)]\n",
-    "tt_l_hist = h[dict(dataset='ttbar', flavor=0j, eta=sum)]\n",
+    "data_hist = h[dict(dataset=\"data\", flavor=sum, eta=sum)]\n",
+    "fqcd_hist = (\n",
+    "    h[dict(dataset=\"QCD\", flavor=sum, eta=slice(1.4j, None, sum))]\n",
+    "    + h[dict(dataset=\"QCD\", flavor=sum, eta=slice(None, -1.4j, sum))]\n",
+    ")\n",
+    "cqcd_hist = h[dict(dataset=\"QCD\", flavor=sum, eta=slice(-1.4j, 1.4j, sum))]\n",
+    "tt_b_hist = h[dict(dataset=\"ttbar\", flavor=4j, eta=sum)]\n",
+    "tt_l_hist = h[dict(dataset=\"ttbar\", flavor=0j, eta=sum)]\n",
     "\n",
     "tab_data = read_hist(data_hist)\n",
     "tab_fqcd = read_hist(fqcd_hist)\n",
@@ -151,7 +158,7 @@
     "tab_tt_b = read_hist(tt_b_hist)\n",
     "tab_tt_l = read_hist(tt_l_hist)\n",
     "\n",
-    "print(tab_data)"
+    "pprint.pprint(tab_data)\n"
    ]
   },
   {
@@ -159,10 +166,10 @@
    "metadata": {},
    "source": [
     "All axes remaining will generate a corresponding array that can be used to\n",
-    "declare `Variable` instances. Notice that because the histogram was declared\n",
-    "with `storage=Weight`, entries for `hist_value` (sum of weights) and\n",
-    "`hist_variance` (sum of weight-squared) will be presented to the user. This\n",
-    "information can the be used for the uncertainty generation."
+    "declare `Variable` instances. Because the histogram was declared with\n",
+    "`storage=Weight`, entries for `hist_value` (sum of weights) and `hist_variance`\n",
+    "(sum of weight-squared) will be presented to the user. This information can the\n",
+    "be used for the uncertainty generation. A simple example is shown below:\n"
    ]
   },
   {
@@ -216,7 +223,7 @@
     "the uncertainty should be calculated (`poisson_sym` or `poisson_asym`), a\n",
     "floating point (pair) to indicate flat, a histogram (pair) of the same format as\n",
     "the input histogram representing the uncertainty, or a numpy array (pair) the is\n",
-    "compatible with the final array output."
+    "compatible with the final array output.\n"
    ]
   },
   {
@@ -248,10 +255,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Example using N-dimensional histogram \n",
+    "## Example using N-dimensional histogram\n",
     "\n",
-    "Notice that because the flexibility of the scikit-hep histogram syntax, the same\n",
-    "syntax can be used for the histograms of arbitrary dimensions\n"
+    "Because of the flexibility of the scikit-hep histogram syntax, the same\n",
+    "functions can be used for the histograms of arbitrary dimensions.\n"
    ]
   },
   {
@@ -269,7 +276,7 @@
     "tt_l_hist = h[dict(dataset=\"ttbar\", flavor=0j)]\n",
     "\n",
     "tab_data = read_hist(data_hist)\n",
-    "# tab_qcd = read_hist(qcd_hist)\n",
+    "# tab_qcd = read_hist(qcd_hist) # No-longer required to be declared!\n",
     "# tab_tt_b = read_hist(tt_b_hist)\n",
     "# tab_tt_l = read_hist(tt_l_hist)\n",
     "\n",
@@ -300,7 +307,7 @@
    "source": [
     "To further simply the construction of table from N-dimensional histograms, we\n",
     "provide a `create_hist_base_table` function such that the axes variables are\n",
-    "automatically setup.\n"
+    "automatically set up directly with table declaration.\n"
    ]
   },
   {
@@ -332,14 +339,14 @@
    "source": [
     "## Outputting the submission\n",
     "\n",
-    "Finally, we can add the table to the sumission and create the files. Please\n",
-    "refer to the [Getting started notebook](Getting_started.ipynb) for a complete\n",
-    "example."
+    "Finally, we can add the table to the sumission and create the required files.\n",
+    "Please refer to the [Getting started notebook](Getting_started.ipynb) for a\n",
+    "complete example.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index 0c071b46..f5a41d88 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -60,7 +60,8 @@ def _get_histaxis_array(axis, flow: bool) -> numpy.ndarray:
     categorical axes, the return will be a N array of bin content values. If the
     flow is set to true, the function will also add the overflow/underflow bins
     according to the settings found in axis.traits. For categorical axis, this
-    will include an extra `__UNDETERMINED__` entry or a +1 entry.
+    will include an extra `"__UNDETERMINED__"` entry (for StrCategory) or an +1
+    entry (for IntCategory).
     """
 
     ## Getting the entries as a simple list
@@ -95,7 +96,28 @@ def hist_as_variable(
 ) -> Variable:
     """
     Returning this histogram entries as a Variable object, with a simpler
-    interface for modifying uncertainty
+    interface for automatically generating values uncertainties.
+
+    The `h` and `flow` inputs are passed directly to the `read_hist` method to
+    extract the value to be used for the variable.
+
+    The `uncertainty` is a dictionary defining how uncertainties should be
+    defined. Dictionary keys are used as the name of the uncertainty, while the
+    value defines how the uncertainty should be constructed. This can either be:
+
+    - `str`: either "poisson_asym" or "poisson_sym", indicating to extract
+      Poisson uncertainty directly from the histogram values. (Either the
+      asymmetric Garwood interval defined by `hist.intervals` or a simply,
+      symmetric `sqrt(n)`.)
+    - `float`: A flat uncertainty to be used on all bins.
+    - `numpy.ndarray`: An array indicating the uncertainty for each bin. The
+      array should be compatible with the output of `read_hist['hist_values']`
+    - `hist.Hist`: The histogram with bin values indicating the uncertainty to
+      be used for each bin. The histogram should be compatible with the input
+      histogram.
+    - `tuple(T,T)` where `T` can either be a `float`, `numpy.ndarray` or
+      `hist.Hist`. This is used to indicate asymmetric uncertainties, following
+      the lower/upper ordering convention of hepdata_lib
     """
     if uncertainty is None:
         uncertainty = {}
@@ -151,9 +173,15 @@ def _make_unc_array(x):
 def _make_poisson_unc_array(
     readout: Dict[str, numpy.ndarray], symmetric: bool
 ) -> numpy.ndarray:
+    """
+    Given the results of `read_hist`, extract the Poisson uncertainty using
+    hist.intervals. Automatically detecting the histogram storage type to handle
+    weighted uncertainties
+    """
     if symmetric:
         if "hist_variance" not in readout.keys():
             numpy.sqrt(readout["hist_value"])
+            return numpy.sqrt(n_events)
         else:  # Effective number of events
             sw, sw2 = readout["hist_value"], readout["hist_variance"]
             n_events = numpy.divide(
@@ -166,7 +194,6 @@ def _make_poisson_unc_array(
                 where=(n_events != 0),
             )
             return sw * rel_unc
-        return numpy.sqrt(n_events)
     else:
         sw, sw2 = readout["hist_value"], readout["hist_value"]
         if "hist_variance" in readout.keys():
@@ -180,12 +207,13 @@ def create_hist_base_table(
     table_name: str,
     h: hist.Hist,
     flow: bool = False,
-    axes_rename: Optional[Dict[str, str]] = None,  # Additional axes proces
-    axes_units: Optional[Dict[str, str]] = None,  # Additional axes proces
+    axes_rename: Optional[Dict[str, str]] = None,
+    axes_units: Optional[Dict[str, str]] = None,
 ) -> Table:
     """
-    Preparing the table based on hist, allows for the additional exclusion of
-    axes via a list of string names
+    Preparing the table based on hist. This constructs just the histogram axis
+    as the table variable. Histogram entries should be added via the
+    `hist_as_variable` method.
     """
     if axes_rename is None:
         axes_rename = {}
diff --git a/requirements.txt b/requirements.txt
index eaf80f96..c1a8d716 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 numpy
 PyYAML>=4.0
 future
+hist
 hepdata-validator>=0.3.5

From 6ab8cb662db411f284a2d44e1756e1153c6a6edf Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 11:42:25 +0100
Subject: [PATCH 03/12] Fixing complaints from pylint

---
 hepdata_lib/hist_utils.py | 106 ++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index f5a41d88..e8bbc447 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -1,15 +1,15 @@
 """hepdata_lib utilities for interacting with scikit-hep hist histograms"""
-from typing import Tuple, Optional, Union, List, Dict
+from typing import Optional, Union, Dict
 import numpy
 
-from hepdata_lib import Table, Variable, Uncertainty
-
 # scikit-hep hist package
 import hist
 import hist.intervals
 
+from hepdata_lib import Table, Variable, Uncertainty
+
 
-def read_hist(h: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
+def read_hist(histo: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
     """
     Converting the scikit-hep histogram in to a dictionary of numpy arrays that
     can be used for hepdata_lib Variable and Uncertainty declaration.
@@ -25,28 +25,33 @@ def read_hist(h: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
     The storage content will be returned as is, so additional uncertainty
     processing will need to be handled by the user using the return values.
     """
-    axes_entries = [_get_histaxis_array(ax, flow=flow) for ax in h.axes]
+    axes_entries = [_get_histaxis_array(ax, flow=flow) for ax in histo.axes]
     axes_entries = numpy.meshgrid(*axes_entries)
 
     ## Getting axes return values
-    readout = {ax.name: axes_entries[idx].flatten() for idx, ax in enumerate(h.axes)}
+    readout = {
+        ax.name: axes_entries[idx].flatten() for idx, ax in enumerate(histo.axes)
+    }
 
     ## Getting the histogram return values
-    view = h.view(flow=flow)
+    view = histo.view(flow=flow)
 
     _storage_keys = {
         hist.storage.Weight: ["value", "variance"],
         hist.storage.Mean: ["value", ""],
     }
 
-    if h.storage_type is hist.storage.Int64 or h.storage_type is hist.storage.Double:
+    if (
+        histo.storage_type is hist.storage.Int64
+        or histo.storage_type is hist.storage.Double
+    ):
         readout["hist_value"] = view.flatten()
-    elif h.storage_type in _storage_keys:
-        for key in _storage_keys[h.storage_type]:
+    elif histo.storage_type in _storage_keys:
+        for key in _storage_keys[histo.storage_type]:
             readout["hist_" + key] = view[key].flatten()
     else:
         raise NotImplementedError(
-            f"Storage type {h.storage_type} currently not implemented"
+            f"Storage type {histo.storage_type} currently not implemented"
         )
 
     return readout
@@ -65,7 +70,7 @@ def _get_histaxis_array(axis, flow: bool) -> numpy.ndarray:
     """
 
     ## Getting the entries as a simple list
-    entries = [x for x in axis]
+    entries = list(axis)
 
     ## Adding overflow bin
     if flow and axis.traits.overflow:
@@ -82,14 +87,16 @@ def _get_histaxis_array(axis, flow: bool) -> numpy.ndarray:
 
     ## Converting to numpy array
     if axis.traits.continuous:
-        return numpy.array(entries, dtype="f,f")
+        entries = numpy.array(entries, dtype="f,f")
     else:
-        return numpy.array(entries)
+        entries = numpy.array(entries)
+
+    return entries
 
 
 def hist_as_variable(
     var_name,
-    h: hist.Hist,
+    histo: hist.Hist,
     flow: bool = False,
     uncertainty: Optional[Dict[str, Union[str, hist.Hist, numpy.ndarray]]] = None,
     **kwargs,
@@ -122,7 +129,7 @@ def hist_as_variable(
     if uncertainty is None:
         uncertainty = {}
 
-    readout = read_hist(h, flow=flow)
+    readout = read_hist(histo, flow=flow)
     var = Variable(
         var_name,
         is_independent=False,
@@ -131,15 +138,14 @@ def hist_as_variable(
     )
     var.values = readout["hist_value"]
 
-    def _make_unc_array(x):
-        if isinstance(x, float):
-            return numpy.ones_like(readout["hist_value"]) * x
-        elif isinstance(x, numpy.ndarray):
-            return x
-        elif isinstance(x, hist.Hist):
-            return read_hist(x, flow=flow)["hist_value"]
-        else:
-            raise NotImplementedError(f"Unknown uncertainty format! {type(x)}")
+    def _make_unc_array(unc_val):
+        if isinstance(unc_val, float):
+            return numpy.ones_like(readout["hist_value"]) * unc_val
+        if isinstance(unc_val, numpy.ndarray):
+            return unc_val
+        if isinstance(unc_val, hist.Hist):
+            return read_hist(unc_val, flow=flow)["hist_value"]
+        raise NotImplementedError(f"Unknown uncertainty format! {type(unc_val)}")
 
     for unc_name, unc_proc in uncertainty.items():
         is_symmetric = None
@@ -155,8 +161,8 @@ def _make_unc_array(x):
         elif isinstance(unc_proc, tuple):  # Asymmetric uncertainty
             is_symmetric = False  #
             assert len(unc_proc) == 2, "Asymmetric uncertainty can only have 2 entries"
-            lo, up = _make_unc_array(unc_proc[0]), _make_unc_array(unc_proc[1])
-            arr = [x for x in zip(lo, up)]
+            _lo, _up = _make_unc_array(unc_proc[0]), _make_unc_array(unc_proc[1])
+            arr = list(zip(_lo, _up))
         else:  # Assuming symmetric error
             is_symmetric = True
             arr = _make_unc_array(unc_proc)
@@ -180,32 +186,34 @@ def _make_poisson_unc_array(
     """
     if symmetric:
         if "hist_variance" not in readout.keys():
-            numpy.sqrt(readout["hist_value"])
-            return numpy.sqrt(n_events)
+            n_events = numpy.sqrt(readout["hist_value"])
+            unc_arr = numpy.sqrt(n_events)
         else:  # Effective number of events
-            sw, sw2 = readout["hist_value"], readout["hist_variance"]
+            _sw, _sw2 = readout["hist_value"], readout["hist_variance"]
             n_events = numpy.divide(
-                sw**2, sw2, out=numpy.zeros_like(sw), where=(sw2 != 0)
+                _sw**2, _sw2, out=numpy.zeros_like(_sw), where=(_sw2 != 0)
             )
             rel_unc = numpy.divide(
                 numpy.sqrt(n_events),
                 n_events,
-                out=numpy.zeros_like(sw),
+                out=numpy.zeros_like(_sw),
                 where=(n_events != 0),
             )
-            return sw * rel_unc
+            unc_arr = _sw * rel_unc
     else:
-        sw, sw2 = readout["hist_value"], readout["hist_value"]
+        _sw, _sw2 = readout["hist_value"], readout["hist_value"]
         if "hist_variance" in readout.keys():
-            sw2 = readout["hist_variance"]
-        lo, up = hist.intervals.poisson_interval(sw, sw2)
-        lo, up = lo - sw, up - sw
-        return [x for x in zip(lo, up)]
+            _sw2 = readout["hist_variance"]
+        _lo, _up = hist.intervals.poisson_interval(_sw, _sw2)
+        _lo, _up = _lo - _sw, _up - _sw
+        unc_arr = list(zip(_lo, _up))
+
+    return unc_arr
 
 
 def create_hist_base_table(
     table_name: str,
-    h: hist.Hist,
+    histo: hist.Hist,
     flow: bool = False,
     axes_rename: Optional[Dict[str, str]] = None,
     axes_units: Optional[Dict[str, str]] = None,
@@ -221,21 +229,21 @@ def create_hist_base_table(
         axes_units = {}
     table = Table(table_name)
 
-    readout = read_hist(h)
+    readout = read_hist(histo, flow=flow)
 
-    for ax in h.axes:
-        var_name = ax.name
-        if ax.name in axes_rename:
-            var_name = axes_rename[ax.name]
-        elif ax.label:
-            var_name = ax.label
+    for axis in histo.axes:
+        var_name = axis.name
+        if axis.name in axes_rename:
+            var_name = axes_rename[axis.name]
+        elif axis.label:
+            var_name = axis.label
         var = Variable(
             var_name,
             is_independent=True,
-            is_binned=ax.traits.continuous,
-            units=axes_units.get(ax.name, ""),
+            is_binned=axis.traits.continuous,
+            units=axes_units.get(axis.name, ""),
         )
-        var.values = readout[ax.name]
+        var.values = readout[axis.name]
         table.add_variable(var)
 
     return table

From 7ddb4556bafc255b5726daea5efb8cc93f7c5074 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 13:33:36 +0100
Subject: [PATCH 04/12] Added unit test for hist_utils functions

---
 tests/test_histread.py | 98 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/tests/test_histread.py b/tests/test_histread.py
index 2e5e49a0..2e6d1742 100644
--- a/tests/test_histread.py
+++ b/tests/test_histread.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 import numpy as np
 import hist
+import hist.intervals
 from hepdata_lib.hist_utils import *
 
 
@@ -40,6 +41,7 @@ class TestHistUtils(TestCase):
     )
 
     def test_default_read(self):
+        # Default read with no projection
         try:
             readout = read_hist(TestHistUtils.base_hist)
         except:
@@ -50,5 +52,101 @@ def test_default_read(self):
         self.assertTrue(len(readout["dataset"]) == len(readout["eta"]))
         self.assertTrue(len(readout["dataset"]) == len(readout["pt"]))
 
+    def test_projection_read(self):
+        # Default read with simple projection
+        try:
+            r1 = read_hist(TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}])
+            r2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
+        except:
+            self.fail("Histogram reading raised an unexpected exception.")
+        # Checking dimension compatibility
+        self.assertTrue(len(r1["eta"]) == len(r1["pt"]))
+        self.assertTrue(len(r1["eta"]) == len(r2["pt"]))
+        self.assertTrue(np.all(r1["eta"] == r2["eta"]))
+        self.assertTrue(np.all(r1["pt"] == r2["pt"]))
+
         # Clean up
         self.doCleanups()
+
+    def test_uncertainty_generation(self):
+        h = TestHistUtils.base_hist[{"dataset": "data"}]
+        q_h = TestHistUtils.base_hist[{"dataset": "QCD"}]
+        t_h = TestHistUtils.base_hist[{"dataset": "ttbar"}]
+
+        h_arr = h.view(flow=True)["value"].flatten()
+        unc_arr = np.ones_like(h_arr) * np.random.random(size=h_arr.shape[0])
+        try:
+            r = hist_as_variable(
+                "testing",
+                h,
+                flow=True,
+                uncertainty={
+                    "symmetric stat": "poisson_sym",
+                    "asymmetric stat": "poisson_asym",
+                    "symmetric float": 1.5,
+                    "asymmetric float": (1.5, 2.2),
+                    "symmetric array": unc_arr,
+                    "asymmetric array": (-0.8 * unc_arr, unc_arr),
+                    "symmetric histogram": q_h,
+                    "asymmetric histogram": (q_h, t_h),
+                },
+            )
+        except:
+            self.fail("Unexpected exception of automatic uncertainty generation.")
+
+        def check_val(a, b):
+            return self.assertTrue(np.all(np.isclose(a, b) | np.isnan(a) | np.isnan(b)))
+
+        h_arr = h.view(flow=True)["value"].flatten()
+        check_val(r.values, h_arr)
+        # Symmetric Poisson
+        check_val(r.uncertainties[0].values, np.sqrt(h_arr))
+
+        # Asymmetric Poisson
+        l, u = hist.intervals.poisson_interval(h_arr)
+        l, u = l - h_arr, u - h_arr
+        check_val(r.uncertainties[1].values, list(zip(l, u)))
+
+        # Flat uncertainties
+        check_val(r.uncertainties[2].values, 1.5)
+        check_val(r.uncertainties[3].values, (1.5, 2.2))
+
+        # Array defined uncertainties
+        check_val(r.uncertainties[4].values, unc_arr)
+        check_val(r.uncertainties[5].values, list(zip(-0.8 * unc_arr, unc_arr)))
+
+        # Histogram defined uncertainties
+        q_arr = q_h.view(flow=True)["value"].flatten()
+        t_arr = t_h.view(flow=True)["value"].flatten()
+        check_val(r.uncertainties[6].values, q_arr)
+        check_val(r.uncertainties[7].values, list(zip(q_arr, t_arr)))
+
+        self.doCleanups()
+
+    def test_table_generation(self):
+        _rename_ = {
+            "dataset": "Data set",
+            "flavor": "Jet flavor",
+            "eta": "Jet $\eta$",
+            "pt": "Jet $p_{T}$",
+        }
+        _units_ = {"pt": "GeV"}
+        try:
+            table = create_hist_base_table(
+                "my_table",
+                TestHistUtils.base_hist,
+                axes_rename=_rename_,
+                axes_units=_units_,
+            )
+        except:
+            self.fail("Unexpected exception of table generation.")
+
+        readout = read_hist(TestHistUtils.base_hist)
+
+        for idx, (name, new_name) in enumerate(_rename_.items()):
+            # Checking rename
+            self.assertTrue(table.variables[idx].name == new_name)
+            self.assertTrue(table.variables[idx].units == _units_.get(name, ""))
+            self.assertTrue(len(table.variables[idx].values) == len(readout[name]))
+
+        self.doCleanups()

From ab99f26ec475a8dcfb6c365caffeb05ea31af994 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 13:38:38 +0100
Subject: [PATCH 05/12] Updating dependencies

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c1a8d716..8b7dc410 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 numpy
 PyYAML>=4.0
 future
-hist
+hist[plot] # Required for intervals
 hepdata-validator>=0.3.5

From 67a9ad47db173336fb9cfe35c4473c04488b2b92 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 13:39:06 +0100
Subject: [PATCH 06/12] Expliciltly listing all dependencies

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8b7dc410..b4f40231 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 numpy
 PyYAML>=4.0
 future
-hist[plot] # Required for intervals
+hist
+scipy
 hepdata-validator>=0.3.5

From 6ae8b034102d31c9159c9d1671b99a801aa57657 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 16:27:25 +0100
Subject: [PATCH 07/12] Fixed pylint for test scripts

---
 tests/test_histread.py | 77 ++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 29 deletions(-)

diff --git a/tests/test_histread.py b/tests/test_histread.py
index 2e6d1742..93607563 100644
--- a/tests/test_histread.py
+++ b/tests/test_histread.py
@@ -4,7 +4,7 @@
 import numpy as np
 import hist
 import hist.intervals
-from hepdata_lib.hist_utils import *
+from hepdata_lib.hist_utils import read_hist, hist_as_variable, create_hist_base_table
 
 
 class TestHistUtils(TestCase):
@@ -41,7 +41,10 @@ class TestHistUtils(TestCase):
     )
 
     def test_default_read(self):
-        # Default read with no projection
+        """
+        Ensure basic readout function generates arrays with compatible
+        dimensions with given the base-level histogram.
+        """
         try:
             readout = read_hist(TestHistUtils.base_hist)
         except:
@@ -51,34 +54,46 @@ def test_default_read(self):
         self.assertTrue(len(readout["dataset"]) == len(readout["flavor"]))
         self.assertTrue(len(readout["dataset"]) == len(readout["eta"]))
         self.assertTrue(len(readout["dataset"]) == len(readout["pt"]))
+        self.doCleanups()
 
     def test_projection_read(self):
+        """
+        Ensure basic readout function generates arrays with compatible
+        dimensions with histogram slicing operations.
+        """
         # Default read with simple projection
         try:
-            r1 = read_hist(TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}])
-            r2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
+            read1 = read_hist(
+                TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}]
+            )
+            read2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
         except:
             self.fail("Histogram reading raised an unexpected exception.")
         # Checking dimension compatibility
-        self.assertTrue(len(r1["eta"]) == len(r1["pt"]))
-        self.assertTrue(len(r1["eta"]) == len(r2["pt"]))
-        self.assertTrue(np.all(r1["eta"] == r2["eta"]))
-        self.assertTrue(np.all(r1["pt"] == r2["pt"]))
+        self.assertTrue(len(read1["eta"]) == len(read1["pt"]))
+        self.assertTrue(len(read1["eta"]) == len(read2["pt"]))
+        self.assertTrue(np.all(read1["eta"] == read2["eta"]))
+        self.assertTrue(np.all(read1["pt"] == read2["pt"]))
 
         # Clean up
         self.doCleanups()
 
     def test_uncertainty_generation(self):
-        h = TestHistUtils.base_hist[{"dataset": "data"}]
+        """
+        Exhaustively testing automatic variable generation with all defined
+        uncertainty formats
+        """
+
+        d_h = TestHistUtils.base_hist[{"dataset": "data"}]
         q_h = TestHistUtils.base_hist[{"dataset": "QCD"}]
         t_h = TestHistUtils.base_hist[{"dataset": "ttbar"}]
 
-        h_arr = h.view(flow=True)["value"].flatten()
-        unc_arr = np.ones_like(h_arr) * np.random.random(size=h_arr.shape[0])
+        d_arr = d_h.view(flow=True)["value"].flatten()
+        unc_arr = np.ones_like(d_arr) * np.random.random(size=d_arr.shape[0])
         try:
-            r = hist_as_variable(
+            auto_var = hist_as_variable(
                 "testing",
-                h,
+                d_h,
                 flow=True,
                 uncertainty={
                     "symmetric stat": "poisson_sym",
@@ -94,41 +109,45 @@ def test_uncertainty_generation(self):
         except:
             self.fail("Unexpected exception of automatic uncertainty generation.")
 
-        def check_val(a, b):
-            return self.assertTrue(np.all(np.isclose(a, b) | np.isnan(a) | np.isnan(b)))
+        def check_val(arr1, arr2):
+            return self.assertTrue(
+                np.all(np.isclose(arr1, arr2) | np.isnan(arr1) | np.isnan(arr2))
+            )
 
-        h_arr = h.view(flow=True)["value"].flatten()
-        check_val(r.values, h_arr)
+        check_val(auto_var.values, d_arr)
         # Symmetric Poisson
-        check_val(r.uncertainties[0].values, np.sqrt(h_arr))
+        check_val(auto_var.uncertainties[0].values, np.sqrt(d_arr))
 
         # Asymmetric Poisson
-        l, u = hist.intervals.poisson_interval(h_arr)
-        l, u = l - h_arr, u - h_arr
-        check_val(r.uncertainties[1].values, list(zip(l, u)))
+        _l, _u = hist.intervals.poisson_interval(d_arr)
+        _l, _u = _l - d_arr, _u - d_arr
+        check_val(auto_var.uncertainties[1].values, list(zip(_l, _u)))
 
         # Flat uncertainties
-        check_val(r.uncertainties[2].values, 1.5)
-        check_val(r.uncertainties[3].values, (1.5, 2.2))
+        check_val(auto_var.uncertainties[2].values, 1.5)
+        check_val(auto_var.uncertainties[3].values, (1.5, 2.2))
 
         # Array defined uncertainties
-        check_val(r.uncertainties[4].values, unc_arr)
-        check_val(r.uncertainties[5].values, list(zip(-0.8 * unc_arr, unc_arr)))
+        check_val(auto_var.uncertainties[4].values, unc_arr)
+        check_val(auto_var.uncertainties[5].values, list(zip(-0.8 * unc_arr, unc_arr)))
 
         # Histogram defined uncertainties
         q_arr = q_h.view(flow=True)["value"].flatten()
         t_arr = t_h.view(flow=True)["value"].flatten()
-        check_val(r.uncertainties[6].values, q_arr)
-        check_val(r.uncertainties[7].values, list(zip(q_arr, t_arr)))
+        check_val(auto_var.uncertainties[6].values, q_arr)
+        check_val(auto_var.uncertainties[7].values, list(zip(q_arr, t_arr)))
 
         self.doCleanups()
 
     def test_table_generation(self):
+        """
+        Base table generation with base histogram
+        """
         _rename_ = {
             "dataset": "Data set",
             "flavor": "Jet flavor",
-            "eta": "Jet $\eta$",
-            "pt": "Jet $p_{T}$",
+            "eta": r"Jet $\eta$",
+            "pt": r"Jet $p_{T}$",
         }
         _units_ = {"pt": "GeV"}
         try:

From a9783ee71aef22aba8d429da16d46a8b24015f3a Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 16:55:11 +0100
Subject: [PATCH 08/12] Disabling specific pylint

---
 tests/test_histread.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_histread.py b/tests/test_histread.py
index 93607563..27de29fa 100644
--- a/tests/test_histread.py
+++ b/tests/test_histread.py
@@ -47,8 +47,10 @@ def test_default_read(self):
         """
         try:
             readout = read_hist(TestHistUtils.base_hist)
+        # pylint: disable=W0702
         except:
             self.fail("Histogram reading raised an unexpected exception.")
+        # pylint: enable=W0702
 
         # Checking dimension compatibility
         self.assertTrue(len(readout["dataset"]) == len(readout["flavor"]))
@@ -67,8 +69,11 @@ def test_projection_read(self):
                 TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}]
             )
             read2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
+        # pylint: disable=W0702
         except:
             self.fail("Histogram reading raised an unexpected exception.")
+        # pylint: enable=W0702
+
         # Checking dimension compatibility
         self.assertTrue(len(read1["eta"]) == len(read1["pt"]))
         self.assertTrue(len(read1["eta"]) == len(read2["pt"]))
@@ -106,8 +111,10 @@ def test_uncertainty_generation(self):
                     "asymmetric histogram": (q_h, t_h),
                 },
             )
+        # pylint: disable=W0702
         except:
             self.fail("Unexpected exception of automatic uncertainty generation.")
+        # pylint: enable=W0702
 
         def check_val(arr1, arr2):
             return self.assertTrue(
@@ -157,8 +164,10 @@ def test_table_generation(self):
                 axes_rename=_rename_,
                 axes_units=_units_,
             )
+        # pylint: disable=W0702
         except:
             self.fail("Unexpected exception of table generation.")
+        # pylint: enable=W0702
 
         readout = read_hist(TestHistUtils.base_hist)
 

From bab436477bf20fd3a190c30913889e122516ad42 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Mon, 30 Oct 2023 20:59:50 +0100
Subject: [PATCH 09/12] Support for all standard hist.storage methods. Testing
 for all storage types and removing exception handling

---
 hepdata_lib/hist_utils.py |  12 +++-
 tests/test_histread.py    | 129 +++++++++++++++++++++++---------------
 2 files changed, 88 insertions(+), 53 deletions(-)

diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index e8bbc447..3e28af03 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -38,12 +38,20 @@ def read_hist(histo: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
 
     _storage_keys = {
         hist.storage.Weight: ["value", "variance"],
-        hist.storage.Mean: ["value", ""],
+        hist.storage.Mean: ["value", "count", "_sum_of_deltas_squared"],
+        hist.storage.WeightedMean: [
+            "value",
+            "sum_of_weights",
+            "sum_of_weights_squared",
+            "_sum_of_weighted_deltas_squared",
+        ],
     }
 
-    if (
+    if (  # Single value storages
         histo.storage_type is hist.storage.Int64
         or histo.storage_type is hist.storage.Double
+        or histo.storage_type is hist.storage.AtomicInt64
+        or histo.storage_type is hist.storage.Unlimited
     ):
         readout["hist_value"] = view.flatten()
     elif histo.storage_type in _storage_keys:
diff --git a/tests/test_histread.py b/tests/test_histread.py
index 27de29fa..a02dd5e0 100644
--- a/tests/test_histread.py
+++ b/tests/test_histread.py
@@ -43,19 +43,55 @@ class TestHistUtils(TestCase):
     def test_default_read(self):
         """
         Ensure basic readout function generates arrays with compatible
-        dimensions with given the base-level histogram.
+        dimensions with given the base-level histogram. Also checking alternate
+        binning methods.
         """
-        try:
-            readout = read_hist(TestHistUtils.base_hist)
-        # pylint: disable=W0702
-        except:
-            self.fail("Histogram reading raised an unexpected exception.")
-        # pylint: enable=W0702
+        # Always test the base histogram
+        readout = read_hist(TestHistUtils.base_hist)
 
         # Checking dimension compatibility
-        self.assertTrue(len(readout["dataset"]) == len(readout["flavor"]))
-        self.assertTrue(len(readout["dataset"]) == len(readout["eta"]))
-        self.assertTrue(len(readout["dataset"]) == len(readout["pt"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["hist_variance"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["dataset"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["flavor"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["eta"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["pt"]))
+        self.assertTrue(len(readout.keys()) == 6)
+
+        ## Histograms with alternate types
+        # Simple double
+        h_double = hist.Hist(hist.axis.Regular(10, 0, 1, name="x"))  # Double
+        readout = read_hist(h_double)
+        self.assertTrue(len(readout["hist_value"]) == len(readout["x"]))
+        self.assertTrue(len(readout.keys()) == 2)
+
+        h_mean = hist.Hist(
+            hist.axis.Regular(10, 0, 1, name="x"), storage=hist.storage.Mean()
+        )
+        readout = read_hist(h_mean)
+        self.assertTrue(len(readout["hist_value"]) == len(readout["x"]))
+        self.assertTrue(len(readout["hist_value"]) == len(readout["hist_count"]))
+        self.assertTrue(
+            len(readout["hist_value"]) == len(readout["hist__sum_of_deltas_squared"])
+        )
+        self.assertTrue(len(readout.keys()) == 4)
+
+        h_mean = hist.Hist(
+            hist.axis.Regular(10, 0, 1, name="x"), storage=hist.storage.WeightedMean()
+        )
+        readout = read_hist(h_mean)
+        self.assertTrue(len(readout["hist_value"]) == len(readout["x"]))
+        self.assertTrue(
+            len(readout["hist_value"]) == len(readout["hist_sum_of_weights"])
+        )
+        self.assertTrue(
+            len(readout["hist_value"]) == len(readout["hist_sum_of_weights_squared"])
+        )
+        self.assertTrue(
+            len(readout["hist_value"])
+            == len(readout["hist__sum_of_weighted_deltas_squared"])
+        )
+        self.assertTrue(len(readout.keys()) == 5)
+
         self.doCleanups()
 
     def test_projection_read(self):
@@ -63,16 +99,9 @@ def test_projection_read(self):
         Ensure basic readout function generates arrays with compatible
         dimensions with histogram slicing operations.
         """
-        # Default read with simple projection
-        try:
-            read1 = read_hist(
-                TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}]
-            )
-            read2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
-        # pylint: disable=W0702
-        except:
-            self.fail("Histogram reading raised an unexpected exception.")
-        # pylint: enable=W0702
+        # Default read with slicing projection
+        read1 = read_hist(TestHistUtils.base_hist[{"dataset": "data", "flavor": sum}])
+        read2 = read_hist(TestHistUtils.base_hist[{"dataset": "QCD", "flavor": 0j}])
 
         # Checking dimension compatibility
         self.assertTrue(len(read1["eta"]) == len(read1["pt"]))
@@ -80,6 +109,13 @@ def test_projection_read(self):
         self.assertTrue(np.all(read1["eta"] == read2["eta"]))
         self.assertTrue(np.all(read1["pt"] == read2["pt"]))
 
+        # Profiling histogram conversion
+        read3 = read_hist(
+            TestHistUtils.base_hist[{"dataset": sum, "flavor": sum}].profile("eta")
+        )
+        self.assertTrue(len(read3["pt"]) == len(read3["hist_value"]))
+        self.assertTrue(len(read3.keys()) == 4)
+
         # Clean up
         self.doCleanups()
 
@@ -95,26 +131,21 @@ def test_uncertainty_generation(self):
 
         d_arr = d_h.view(flow=True)["value"].flatten()
         unc_arr = np.ones_like(d_arr) * np.random.random(size=d_arr.shape[0])
-        try:
-            auto_var = hist_as_variable(
-                "testing",
-                d_h,
-                flow=True,
-                uncertainty={
-                    "symmetric stat": "poisson_sym",
-                    "asymmetric stat": "poisson_asym",
-                    "symmetric float": 1.5,
-                    "asymmetric float": (1.5, 2.2),
-                    "symmetric array": unc_arr,
-                    "asymmetric array": (-0.8 * unc_arr, unc_arr),
-                    "symmetric histogram": q_h,
-                    "asymmetric histogram": (q_h, t_h),
-                },
-            )
-        # pylint: disable=W0702
-        except:
-            self.fail("Unexpected exception of automatic uncertainty generation.")
-        # pylint: enable=W0702
+        auto_var = hist_as_variable(
+            "testing",
+            d_h,
+            flow=True,
+            uncertainty={
+                "symmetric stat": "poisson_sym",
+                "asymmetric stat": "poisson_asym",
+                "symmetric float": 1.5,
+                "asymmetric float": (1.5, 2.2),
+                "symmetric array": unc_arr,
+                "asymmetric array": (-0.8 * unc_arr, unc_arr),
+                "symmetric histogram": q_h,
+                "asymmetric histogram": (q_h, t_h),
+            },
+        )
 
         def check_val(arr1, arr2):
             return self.assertTrue(
@@ -157,17 +188,13 @@ def test_table_generation(self):
             "pt": r"Jet $p_{T}$",
         }
         _units_ = {"pt": "GeV"}
-        try:
-            table = create_hist_base_table(
-                "my_table",
-                TestHistUtils.base_hist,
-                axes_rename=_rename_,
-                axes_units=_units_,
-            )
-        # pylint: disable=W0702
-        except:
-            self.fail("Unexpected exception of table generation.")
-        # pylint: enable=W0702
+
+        table = create_hist_base_table(
+            "my_table",
+            TestHistUtils.base_hist,
+            axes_rename=_rename_,
+            axes_units=_units_,
+        )
 
         readout = read_hist(TestHistUtils.base_hist)
 

From da1740a3ebd0454f57009af1f9fdc7c65f7b9e85 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Fri, 3 Nov 2023 10:54:26 +0100
Subject: [PATCH 10/12] Additional edge cases handling

---
 hepdata_lib/hist_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index 3e28af03..a3a90bed 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -150,7 +150,7 @@ def _make_unc_array(unc_val):
         if isinstance(unc_val, float):
             return numpy.ones_like(readout["hist_value"]) * unc_val
         if isinstance(unc_val, numpy.ndarray):
-            return unc_val
+            return unc_val.flatten()
         if isinstance(unc_val, hist.Hist):
             return read_hist(unc_val, flow=flow)["hist_value"]
         raise NotImplementedError(f"Unknown uncertainty format! {type(unc_val)}")
@@ -214,6 +214,9 @@ def _make_poisson_unc_array(
             _sw2 = readout["hist_variance"]
         _lo, _up = hist.intervals.poisson_interval(_sw, _sw2)
         _lo, _up = _lo - _sw, _up - _sw
+        # Suppressing the NAN for zero-entry events
+        _lo = numpy.nan_to_num(_lo, nan=0.0)
+        _up = numpy.nan_to_num(_up, nan=0.0)
         unc_arr = list(zip(_lo, _up))
 
     return unc_arr

From 11ae27513c050a1ef30b963a9cadf7bee940711e Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Fri, 3 Nov 2023 13:29:21 +0100
Subject: [PATCH 11/12] Generalizing storage method detector to work for any
 input (compatibility for hist==2.4.0 for python==3.6)

---
 hepdata_lib/hist_utils.py | 15 +++------------
 tests/test_histread.py    |  2 +-
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index a3a90bed..9b111eaa 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -47,20 +47,11 @@ def read_hist(histo: hist.Hist, flow: bool = False) -> Dict[str, numpy.ndarray]:
         ],
     }
 
-    if (  # Single value storages
-        histo.storage_type is hist.storage.Int64
-        or histo.storage_type is hist.storage.Double
-        or histo.storage_type is hist.storage.AtomicInt64
-        or histo.storage_type is hist.storage.Unlimited
-    ):
+    if view.dtype.names is None:  # Single value storages
         readout["hist_value"] = view.flatten()
-    elif histo.storage_type in _storage_keys:
-        for key in _storage_keys[histo.storage_type]:
-            readout["hist_" + key] = view[key].flatten()
     else:
-        raise NotImplementedError(
-            f"Storage type {histo.storage_type} currently not implemented"
-        )
+        for var_name in view.dtype.names:
+            readout["hist_" + var_name] = view[var_name].flatten()
 
     return readout
 
diff --git a/tests/test_histread.py b/tests/test_histread.py
index a02dd5e0..44d80233 100644
--- a/tests/test_histread.py
+++ b/tests/test_histread.py
@@ -157,7 +157,7 @@ def check_val(arr1, arr2):
         check_val(auto_var.uncertainties[0].values, np.sqrt(d_arr))
 
         # Asymmetric Poisson
-        _l, _u = hist.intervals.poisson_interval(d_arr)
+        _l, _u = hist.intervals.poisson_interval(d_arr, d_arr)
         _l, _u = _l - d_arr, _u - d_arr
         check_val(auto_var.uncertainties[1].values, list(zip(_l, _u)))
 

From f7dcb33090fb327c107b0a49f2eb80456a01a607 Mon Sep 17 00:00:00 2001
From: Yi-Mu Chen <enochnotsocool@gmail.com>
Date: Tue, 7 Nov 2023 12:06:27 +0100
Subject: [PATCH 12/12] Added additional handling for integer bins

---
 hepdata_lib/hist_utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hepdata_lib/hist_utils.py b/hepdata_lib/hist_utils.py
index 9b111eaa..2ec99608 100644
--- a/hepdata_lib/hist_utils.py
+++ b/hepdata_lib/hist_utils.py
@@ -77,12 +77,17 @@ def _get_histaxis_array(axis, flow: bool) -> numpy.ndarray:
             entries.append("__UNDETERMINED__")
         elif isinstance(axis, hist.axis.IntCategory):
             entries.append(entries[-1] + 1)
+        elif isinstance(axis, hist.axis.Integer):
+            entries.append(numpy.inf)
         else:
             entries.append((axis.edges[-1], numpy.inf))
 
     ## Adding underflow bin
     if flow and axis.traits.underflow:
-        entries = [(-numpy.inf, axis.edges[0])] + entries
+        if isinstance(axis,hist.axis.Integer):
+            entries = [-numpy.inf] + entries
+        else:
+            entries = [(-numpy.inf, axis.edges[0])] + entries
 
     ## Converting to numpy array
     if axis.traits.continuous: