From 31110d0d23336a543e8ae93b513f54b922b87a95 Mon Sep 17 00:00:00 2001
From: Bing Li <104096768+bingli621@users.noreply.github.com>
Date: Mon, 28 Oct 2024 10:02:52 -0400
Subject: [PATCH] added norm columns to ScanData class

---
 src/tavi/data/scan_data.py | 106 ++++++++++++++++++++++++-------------
 tests/test_scan_data.py    |  97 ++++++++++++++++++++++++++++++---
 2 files changed, 159 insertions(+), 44 deletions(-)

diff --git a/src/tavi/data/scan_data.py b/src/tavi/data/scan_data.py
index c1eb947..6c64c94 100644
--- a/src/tavi/data/scan_data.py
+++ b/src/tavi/data/scan_data.py
@@ -1,20 +1,27 @@
 # -*- coding: utf-8 -*-
 
+import math
+from typing import Optional
+
 import numpy as np
 
 
 class ScanData1D(object):
     """1D scan data ready to be plot, with ooptions to renormalize or rebin"""
 
-    ZERO = 1e-6
-
-    def __init__(self, x: np.ndarray, y: np.ndarray) -> None:
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        norm: Optional[np.ndarray] = None,
+    ) -> None:
 
         # ind = np.argsort(x)
         # self.x = x[ind]
         # self.y = y[ind]
         self.x = x
         self.y = y
+        self.norm = norm
         self.err = np.sqrt(y)
         # self._ind = ind
         self.label = ""
@@ -92,11 +99,17 @@ def __sub__(self, other):
         scan_data_1d.err = np.sqrt(self.err**2 + other.err**2)
         return scan_data_1d
 
-    def renorm(self, norm_col: np.ndarray, norm_val: float = 1.0):
+    def renorm(self, norm: Optional[np.ndarray] = None, val: float = 1.0):
         """Renormalized to norm_val"""
-        # norm_col = norm_col[self._ind]
-        self.y = self.y / norm_col * norm_val
-        self.err = self.err / norm_col * norm_val
+
+        if norm is not None:
+            norm_col = norm
+        elif self.norm is not None:
+            norm_col = self.norm
+        else:
+            raise ValueError("Normalizaion collumns cannot be None.")
+        self.y = self.y / norm_col * val
+        self.err = self.err / norm_col * val
 
     def rebin_tol(self, rebin_params: tuple, weight_col: np.ndarray):
         """Rebin with tolerance"""
@@ -105,22 +118,28 @@ def rebin_tol(self, rebin_params: tuple, weight_col: np.ndarray):
         rebin_min = np.min(self.x) if rebin_min is None else rebin_min
         rebin_max = np.max(self.x) if rebin_max is None else rebin_max
 
-        x_grid = np.arange(rebin_min - rebin_step / 2, rebin_max + rebin_step * 3 / 2, rebin_step)
-        x = np.zeros_like(x_grid)
-        y = np.zeros_like(x_grid)
-        counts = np.zeros_like(x_grid)
-        weights = np.zeros_like(x_grid)
+        ZERO = rebin_step / 100  # helps with the rounding error
+        num = math.floor((rebin_max + ZERO - rebin_min) / rebin_step) + 1
 
-        for i, x0 in enumerate(self.x):
-            idx = np.nanargmax(x_grid + rebin_step / 2 + ScanData1D.ZERO >= x0)
-            y[idx] += self.y[i]
-            x[idx] += self.x[i] * weight_col[i]
-            weights[idx] += weight_col[i]
-            counts[idx] += 1
+        x_boundary = np.linspace(rebin_min - rebin_step / 2, rebin_min + rebin_step * (num - 1 / 2), num + 1)
+        x = np.linspace(rebin_min, rebin_min + rebin_step * (num - 1), num)
+        y = np.zeros_like(x)
+        counts = np.zeros_like(x)
+        weights = np.zeros_like(x)
+        x = np.zeros_like(y)
 
-        self.err = np.sqrt(y[1:-2]) / counts[1:-2]
-        self.y = y[1:-2] / counts[1:-2]
-        self.x = x[1:-2] / weights[1:-2]
+        for i, x0 in enumerate(self.x):
+            # Return the indices of the maximum values in the specified axis ignoring NaNs.
+            idx = np.nanargmax(x_boundary - ZERO > x0)
+            if idx > 0:  # ignore first and last bin box
+                y[idx - 1] += self.y[i]
+                x[idx - 1] += self.x[i] * weight_col[i]
+                weights[idx - 1] += weight_col[i]
+                counts[idx - 1] += 1
+
+        self.err = np.sqrt(y) / counts
+        self.y = y / counts
+        self.x = x / weights
 
     def rebin_tol_renorm(self, rebin_params: tuple, norm_col: np.ndarray, norm_val: float = 1.0):
         """Rebin with tolerance and renormalize"""
@@ -128,6 +147,8 @@ def rebin_tol_renorm(self, rebin_params: tuple, norm_col: np.ndarray, norm_val:
         rebin_min = np.min(self.x) if rebin_min is None else rebin_min
         rebin_max = np.max(self.x) if rebin_max is None else rebin_max
 
+        ZERO = rebin_step / 100  # helps with the rounding error
+
         x_grid = np.arange(rebin_min - rebin_step / 2, rebin_max + rebin_step * 3 / 2, rebin_step)
         x = np.zeros_like(x_grid)
         y = np.zeros_like(x_grid)
@@ -136,7 +157,7 @@ def rebin_tol_renorm(self, rebin_params: tuple, norm_col: np.ndarray, norm_val:
         # norm_col = norm_col[self._ind]
 
         for i, x0 in enumerate(self.x):
-            idx = np.nanargmax(x_grid + rebin_step / 2 + ScanData1D.ZERO >= x0)
+            idx = np.nanargmax(x_grid + rebin_step / 2 + ZERO >= x0)
             y[idx] += self.y[i]
             x[idx] += self.x[i] * norm_col[i]
             counts[idx] += norm_col[i]
@@ -151,18 +172,25 @@ def rebin_grid(self, rebin_params: tuple):
         rebin_min = np.min(self.x) if rebin_min is None else rebin_min
         rebin_max = np.max(self.x) if rebin_max is None else rebin_max
 
-        x = np.arange(rebin_min - rebin_step / 2, rebin_max + rebin_step * 3 / 2, rebin_step)
+        ZERO = rebin_step / 100  # helps with the rounding error
+        num = math.floor((rebin_max + ZERO - rebin_min) / rebin_step) + 1
+
+        x_boundary = np.linspace(rebin_min - rebin_step / 2, rebin_min + rebin_step * (num - 1 / 2), num + 1)
+        x = np.linspace(rebin_min, rebin_min + rebin_step * (num - 1), num)
         y = np.zeros_like(x)
         counts = np.zeros_like(x)
 
         for i, x0 in enumerate(self.x):
-            idx = np.nanargmax(x + rebin_step / 2 + ScanData1D.ZERO >= x0)
-            y[idx] += self.y[i]
-            counts[idx] += 1
 
-        self.x = x[1:-2]
-        self.err = np.sqrt(y[1:-2]) / counts[1:-2]
-        self.y = y[1:-2] / counts[1:-2]
+            # Return the indices of the maximum values in the specified axis ignoring NaNs.
+            idx = np.nanargmax(x_boundary - ZERO > x0)
+            if idx > 0:  # ignore first and last bin box
+                y[idx - 1] += self.y[i]
+                counts[idx - 1] += 1
+
+        self.x = x
+        self.err = np.sqrt(y) / counts
+        self.y = y / counts
 
     def rebin_grid_renorm(self, rebin_params: tuple, norm_col: np.ndarray, norm_val: float = 1.0):
         """Rebin with a regular grid and renormalize"""
@@ -171,20 +199,26 @@ def rebin_grid_renorm(self, rebin_params: tuple, norm_col: np.ndarray, norm_val:
         rebin_min = np.min(self.x) if rebin_min is None else rebin_min
         rebin_max = np.max(self.x) if rebin_max is None else rebin_max
 
-        x = np.arange(rebin_min - rebin_step / 2, rebin_max + rebin_step * 3 / 2, rebin_step)
+        ZERO = rebin_step / 100  # helps with the rounding error
+        num = math.floor((rebin_max + ZERO - rebin_min) / rebin_step) + 1
+
+        x_boundary = np.linspace(rebin_min - rebin_step / 2, rebin_min + rebin_step * (num - 1 / 2), num + 1)
+        x = np.linspace(rebin_min, rebin_min + rebin_step * (num - 1), num)
         y = np.zeros_like(x)
         counts = np.zeros_like(x)
 
         # norm_col = norm_col[self._ind]
 
         for i, x0 in enumerate(self.x):  # plus ZERO helps improve precision
-            idx = np.nanargmax(x + rebin_step / 2 + ScanData1D.ZERO >= x0)
-            y[idx] += self.y[i]
-            counts[idx] += norm_col[i]
+            idx = np.nanargmax(x_boundary - ZERO > x0)
 
-        self.x = x[1:-2]
-        self.err = np.sqrt(y[1:-2]) / counts[1:-2] * norm_val
-        self.y = y[1:-2] / counts[1:-2] * norm_val
+            if idx > 0:  # ignore first and last bin box
+                y[idx - 1] += self.y[i]
+                counts[idx - 1] += norm_col[i]
+
+        self.x = x
+        self.err = np.sqrt(y) / counts * norm_val
+        self.y = y / counts * norm_val
 
 
 class ScanData2D(object):
diff --git a/tests/test_scan_data.py b/tests/test_scan_data.py
index 3aae303..d205dc8 100644
--- a/tests/test_scan_data.py
+++ b/tests/test_scan_data.py
@@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
+import copy
+
 import numpy as np
 import pytest
 
@@ -7,10 +9,11 @@
 
 @pytest.fixture
 def scans1d():
-    scan0001 = ScanData1D(x=np.array([0, 1, 2]), y=np.array([1, 2, 3]))
+    scan0001 = ScanData1D(x=np.array([0, 1, 2]), y=np.array([1, 2, 3]), norm=np.array([2, 2, 3]))
     scan0002 = ScanData1D(
         x=np.array([15, 15.1, 15.2, 15.3, 15.4, 15.1, 15.2, 15.3, 15.4, 15.5]),
         y=np.array([10, 12, 15, 42, 90, 31, 34, 105, 230, 3]),
+        norm=np.array([2, 2, 2, 2, 2, 5, 5, 5, 5, 5]),
     )
     scan0003 = ScanData1D(x=np.array([0.1, 1.1, 2.1]), y=np.array([1, 1, 1]))
     scan0004 = ScanData1D(x=np.array([-0.9, 0.1, 1.1, 2.1, 3.1]), y=np.array([10, 1, 1, 1, 10]))
@@ -22,14 +25,52 @@ def scans1d():
 
 def test_scan_data_1d_renorm(scans1d):
     scan0001, *_ = scans1d
-    scan0001.renorm(norm_col=np.array([2, 2, 3]), norm_val=2)
+    scan0001.renorm(norm_val=2)
     assert np.allclose(scan0001.y, [1, 2, 2], atol=1e-3)
     assert np.allclose(scan0001.err, [1, np.sqrt(2), np.sqrt(3) / 3 * 2], atol=1e-3)
 
 
-def test_rebin_grid_renorm(scans1d):
+def test_rebin_grid_boundaries(scans1d):
     _, scan0002, *_ = scans1d
 
+    test1 = copy.deepcopy(scan0002)
+
+    test1.rebin_grid(rebin_params=(14.9, 15.6, 0.1))
+    assert np.allclose(test1.x, np.array([14.9, 15.0, 15.1, 15.2, 15.3, 15.4, 15.5, 15.6]), atol=1e-3)
+    y_exp = np.array([np.nan, 10, (12 + 31) / 2, (15 + 34) / 2, (42 + 105) / 2, (90 + 230) / 2, 3, np.nan])
+    assert np.allclose(test1.y, y_exp, atol=1e-3, equal_nan=True)
+    err_exp = np.array(
+        [
+            np.nan,
+            np.sqrt(10),
+            np.sqrt(12 + 31) / 2,
+            np.sqrt(15 + 34) / 2,
+            np.sqrt(42 + 105) / 2,
+            np.sqrt(90 + 230) / 2,
+            np.sqrt(3),
+            np.nan,
+        ]
+    )
+    assert np.allclose(test1.err, err_exp, atol=1e-3, equal_nan=True)
+
+    test2 = copy.deepcopy(scan0002)
+    test2.rebin_grid(rebin_params=(15.1, 15.4, 0.1))
+    assert np.allclose(test2.x, np.array([15.1, 15.2, 15.3, 15.4]), atol=1e-3)
+    y_exp = np.array([(12 + 31) / 2, (15 + 34) / 2, (42 + 105) / 2, (90 + 230) / 2])
+    assert np.allclose(test2.y, y_exp, atol=1e-3, equal_nan=True)
+    err_exp = np.array(
+        [
+            np.sqrt(12 + 31) / 2,
+            np.sqrt(15 + 34) / 2,
+            np.sqrt(42 + 105) / 2,
+            np.sqrt(90 + 230) / 2,
+        ]
+    )
+    assert np.allclose(test2.err, err_exp, atol=1e-3, equal_nan=True)
+
+
+def test_rebin_grid(scans1d):
+    _, scan0002, *_ = scans1d
     scan0002.rebin_grid_renorm(
         rebin_params=(15.0, 15.5, 0.2),
         norm_col=np.array([2, 2, 2, 2, 2, 5, 5, 5, 5, 5]),
@@ -37,26 +78,66 @@ def test_rebin_grid_renorm(scans1d):
     )
     assert np.allclose(
         scan0002.x,
-        [15.1, 15.3, 15.5],
+        [15.0, 15.2, 15.4],
         atol=1e-3,
     )
     assert np.allclose(
         scan0002.y,
         [
-            (10 + 12 + 15 + 31 + 34) / (2 + 2 + 2 + 5 + 5) * 4,
+            10 / 2 * 4,
+            (12 + 15 + 31 + 34) / (2 + 2 + 5 + 5) * 4,
             (42 + 90 + 105 + 230) / (2 + 5 + 2 + 5) * 4,
-            (3) / (5) * 4,
         ],
         atol=1e-3,
+        equal_nan=True,
     )
     assert np.allclose(
         scan0002.err,
         [
-            np.sqrt(10 + 12 + 15 + 31 + 34) / (2 + 2 + 2 + 5 + 5) * 4,
+            np.sqrt(10) / 2 * 4,
+            np.sqrt(12 + 15 + 31 + 34) / (2 + 2 + 5 + 5) * 4,
             np.sqrt(42 + 90 + 105 + 230) / (2 + 5 + 2 + 5) * 4,
-            np.sqrt(3) / (5) * 4,
         ],
         atol=1e-3,
+        equal_nan=True,
+    )
+
+
+def test_rebin_tol(scans1d):
+    _, scan0002, *_ = scans1d
+
+    scan0002.rebin_tol(
+        rebin_params=(15.0, 15.5, 0.2),
+        weight_col=np.array([2, 2, 2, 2, 2, 5, 5, 5, 5, 5]),
+    )
+    assert np.allclose(
+        scan0002.x,
+        [
+            15,
+            (2 * 15.1 + 5 * 15.1 + 2 * 15.2 + 5 * 15.2) / (2 + 2 + 5 + 5),
+            (2 * 15.3 + 5 * 15.3 + 2 * 15.4 + 5 * 15.4) / (2 + 2 + 5 + 5),
+        ],
+        atol=1e-3,
+    )
+    assert np.allclose(
+        scan0002.y,
+        [
+            10 / 2,
+            (12 + 15 + 31 + 34) / (2 + 2 + 5 + 5),
+            (42 + 90 + 105 + 230) / (2 + 5 + 2 + 5),
+        ],
+        atol=1e-3,
+        equal_nan=True,
+    )
+    assert np.allclose(
+        scan0002.err,
+        [
+            np.sqrt(10) / 2,
+            np.sqrt(12 + 15 + 31 + 34) / (2 + 2 + 5 + 5),
+            np.sqrt(42 + 90 + 105 + 230) / (2 + 5 + 2 + 5),
+        ],
+        atol=1e-3,
+        equal_nan=True,
     )