Merge pull request #79 from naved001/refactor/invoice_class_2

Some updates to the pod class
CCI-MOC · Oct 16, 2024 · c86396e · c86396e
2 parents c9768a8 + 5419778
commit c86396e
Show file tree

Hide file tree

Showing 5 changed files with 175 additions and 182 deletions.
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
@@ -14,7 +14,7 @@ jobs:
       - name: Set up python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.10'
+          python-version: '3.11'
 
       - name: Install dependencies
         run: |

diff --git a/openshift_metrics/invoice.py b/openshift_metrics/invoice.py
@@ -3,6 +3,7 @@
 from collections import namedtuple
 from typing import List
 from decimal import Decimal, ROUND_HALF_UP
+import datetime
 
 # GPU types
 GPU_A100 = "NVIDIA-A100-40GB"
@@ -28,9 +29,11 @@
 
 ServiceUnit = namedtuple("ServiceUnit", ["su_type", "su_count", "determinig_resource"])
 
+
 @dataclass
 class Pod:
     """Object that represents a pod"""
+
     pod_name: str
     namespace: str
     start_time: int
@@ -43,20 +46,19 @@ class Pod:
     node_hostname: str
     node_model: str
 
-    @staticmethod
-    def get_service_unit(cpu_count, memory_count, gpu_count, gpu_type, gpu_resource) -> ServiceUnit:
+    def get_service_unit(self) -> ServiceUnit:
         """
         Returns the type of service unit, the count, and the determining resource
         """
         su_type = SU_UNKNOWN
         su_count = 0
 
         # pods that requested a specific GPU but weren't scheduled may report 0 GPU
-        if gpu_resource is not None and gpu_count == 0:
+        if self.gpu_resource is not None and self.gpu_request == 0:
             return ServiceUnit(SU_UNKNOWN_GPU, 0, "GPU")
 
         # pods in weird states
-        if cpu_count == 0 or memory_count == 0:
+        if self.cpu_request == 0 or self.memory_request == 0:
             return ServiceUnit(SU_UNKNOWN, 0, "CPU")
 
         known_gpu_su = {
@@ -82,18 +84,18 @@ def get_service_unit(cpu_count, memory_count, gpu_count, gpu_type, gpu_resource)
             SU_UNKNOWN: {"gpu": -1, "cpu": 1, "ram": 1},
         }
 
-        if gpu_resource is None and gpu_count == 0:
+        if self.gpu_resource is None and self.gpu_request == 0:
             su_type = SU_CPU
-        elif gpu_type is not None and gpu_resource == WHOLE_GPU:
-            su_type = known_gpu_su.get(gpu_type, SU_UNKNOWN_GPU)
-        elif gpu_type == GPU_A100_SXM4:  # for MIG GPU of type A100_SXM4
-            su_type = A100_SXM4_MIG.get(gpu_resource, SU_UNKNOWN_MIG_GPU)
+        elif self.gpu_type is not None and self.gpu_resource == WHOLE_GPU:
+            su_type = known_gpu_su.get(self.gpu_type, SU_UNKNOWN_GPU)
+        elif self.gpu_type == GPU_A100_SXM4:  # for MIG GPU of type A100_SXM4
+            su_type = A100_SXM4_MIG.get(self.gpu_resource, SU_UNKNOWN_MIG_GPU)
         else:
             return ServiceUnit(SU_UNKNOWN_GPU, 0, "GPU")
 
-        cpu_multiplier = cpu_count / su_config[su_type]["cpu"]
-        gpu_multiplier = gpu_count / su_config[su_type]["gpu"]
-        memory_multiplier = memory_count / su_config[su_type]["ram"]
+        cpu_multiplier = self.cpu_request / su_config[su_type]["cpu"]
+        gpu_multiplier = self.gpu_request / su_config[su_type]["gpu"]
+        memory_multiplier = self.memory_request / su_config[su_type]["ram"]
 
         su_count = max(cpu_multiplier, gpu_multiplier, memory_multiplier)
 
@@ -114,6 +116,45 @@ def get_runtime(self) -> Decimal:
         """Return runtime eligible for billing in hours"""
         return Decimal(self.duration) / 3600
 
+    @property
+    def end_time(self) -> int:
+        return self.start_time + self.duration
+
+    def generate_pod_row(self):
+        """
+        This returns a row to represent pod data.
+        It converts the epoch_time stamps to datetime timestamps so it's more readable.
+        Additionally, some metrics are rounded for readibility.
+        """
+        su_type, su_count, determining_resource = self.get_service_unit()
+        start_time = datetime.datetime.fromtimestamp(
+            self.start_time, datetime.UTC
+        ).strftime("%Y-%m-%dT%H:%M:%S")
+        end_time = datetime.datetime.fromtimestamp(
+            self.end_time, datetime.UTC
+        ).strftime("%Y-%m-%dT%H:%M:%S")
+        memory_request = self.memory_request.quantize(
+            Decimal(".0001"), rounding=ROUND_HALF_UP
+        )
+        runtime = self.get_runtime().quantize(Decimal(".0001"), rounding=ROUND_HALF_UP)
+        return [
+            self.namespace,
+            start_time,
+            end_time,
+            runtime,
+            self.pod_name,
+            self.cpu_request,
+            self.gpu_request,
+            self.gpu_type,
+            self.gpu_resource,
+            self.node_hostname,
+            self.node_model,
+            memory_request,
+            determining_resource,
+            su_type,
+            su_count,
+        ]
+
 
 @dataclass()
 class Rates:
@@ -150,13 +191,7 @@ class ProjectInvoce:
 
     def add_pod(self, pod: Pod) -> None:
         """Aggregate a pods data"""
-        su_type, su_count, _ = Pod.get_service_unit(
-            cpu_count=pod.cpu_request,
-            memory_count=pod.memory_request,
-            gpu_count=pod.gpu_request,
-            gpu_type=pod.gpu_type,
-            gpu_resource=pod.gpu_resource,
-        )
+        su_type, su_count, _ = pod.get_service_unit()
         duration_in_hours = pod.get_runtime()
         self.su_hours[su_type] += su_count * duration_in_hours
 

diff --git a/openshift_metrics/tests/test_metrics_processor.py b/openshift_metrics/tests/test_metrics_processor.py
@@ -1,5 +1,5 @@
 from unittest import TestCase
-from openshift_metrics import metrics_processor, utils
+from openshift_metrics import metrics_processor, invoice
 
 
 class TestMergeMetrics(TestCase):
@@ -490,43 +490,43 @@ def test_condense_metrics_with_changing_gpu(self):
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_V100,
+                            "gpu_type": invoice.GPU_V100,
                         },
                         2700: {
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_V100,
+                            "gpu_type": invoice.GPU_V100,
                         },
                         3600: {  # type of GPU is changed
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         4500: {
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         5400: {
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         6300: {  # count of GPU is changed
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 3,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         7200: {
                             "cpu": 1,
                             "mem": 4,
                             "gpu_request": 3,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         8100: {  # no longer using GPUs
                             "cpu": 1,
@@ -546,21 +546,21 @@ def test_condense_metrics_with_changing_gpu(self):
                             "mem": 4,
                             "duration": 1800,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_V100,
+                            "gpu_type": invoice.GPU_V100,
                         },
                         3600: {
                             "cpu": 1,
                             "mem": 4,
                             "duration": 2700,
                             "gpu_request": 1,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         6300: {
                             "cpu": 1,
                             "mem": 4,
                             "duration": 1800,
                             "gpu_request": 3,
-                            "gpu_type": utils.GPU_A100_SXM4,
+                            "gpu_type": invoice.GPU_A100_SXM4,
                         },
                         8100: {
                             "cpu": 1,