From f192fb3c05da09d4b5f8cc90ef5740ee9aef07bd Mon Sep 17 00:00:00 2001
From: Ran Ran <ran.rissy@gmail.com>
Date: Thu, 4 Apr 2024 14:49:28 -0700
Subject: [PATCH] Increase fail score for pylint (#240)

---
 .pylintrc                   |  1 +
 dags/vm_resource.py         | 30 +++++++++++++++++++++++-------
 scripts/code-style.sh       |  2 +-
 xlml/apis/gcp_config.py     |  3 ++-
 xlml/apis/metric_config.py  |  9 ++++-----
 xlml/apis/task.py           | 10 ++++++----
 xlml/apis/test_config.py    |  6 ++++--
 xlml/utils/bigquery.py      |  5 +++--
 xlml/utils/bigquery_test.py |  2 ++
 xlml/utils/gke.py           |  2 ++
 xlml/utils/gpu.py           |  4 ++--
 xlml/utils/metric_test.py   | 15 ++++++++-------
 xlml/utils/tpu.py           |  4 ++--
 xlml/utils/xpk.py           |  4 +---
 14 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 5c771e62..d3f6404c 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -308,6 +308,7 @@ redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functool
 # Logging modules to check that the string format arguments are in logging
 # function parameter format
 logging-modules=logging,absl.logging,tensorflow.io.logging
+disable=logging-fstring-interpolation
 
 
 [SIMILARITIES]
diff --git a/dags/vm_resource.py b/dags/vm_resource.py
index 157b3886..89c40f19 100644
--- a/dags/vm_resource.py
+++ b/dags/vm_resource.py
@@ -14,8 +14,8 @@
 
 """The file for common projects, zone, and runtime versions."""
 
-import enum
 import datetime
+import enum
 
 
 V5_NETWORKS_PREFIX = "projects/tpu-prod-env-automated"
@@ -55,7 +55,8 @@ class Zone(enum.Enum):
   US_CENTRAL1_B = "us-central1-b"
   # reserved v4-8 & v4-32 in cloud-ml-auto-solutions
   US_CENTRAL2_B = "us-central2-b"
-  # reserved/on-demand v2-8 in cloud-ml-auto-solutions & reserved h100 in supercomputer-testing
+  # reserved/on-demand v2-8 in cloud-ml-auto-solutions
+  # & reserved h100 in supercomputer-testing
   US_CENTRAL1_C = "us-central1-c"
   # committed resource for A100
   US_CENTRAL1_F = "us-central1-f"
@@ -137,8 +138,23 @@ class DockerImage(enum.Enum):
   """Common docker images."""
 
   XPK_JAX_TEST = "gcr.io/cloud-ml-auto-solutions/xpk_jax_test:latest"
-  PYTORCH_NIGHTLY = f"us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_{datetime.datetime.today().strftime('%Y%m%d')}"
-  MAXTEXT_TPU_JAX_STABLE = f"gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:{datetime.datetime.today().strftime('%Y-%m-%d')}"
-  MAXTEXT_TPU_JAX_NIGHTLY = f"gcr.io/tpu-prod-env-multipod/maxtext_jax_nightly:{datetime.datetime.today().strftime('%Y-%m-%d')}"
-  MAXTEXT_GPU_JAX_STABLE = f"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_stable:{datetime.datetime.today().strftime('%Y-%m-%d')}"
-  MAXTEXT_GPU_JAX_NIGHTLY = f"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_nightly:{datetime.datetime.today().strftime('%Y-%m-%d')}"
+  PYTORCH_NIGHTLY = (
+      "us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/"
+      f"xla:nightly_3.10_tpuvm_{datetime.datetime.today().strftime('%Y%m%d')}"
+  )
+  MAXTEXT_TPU_JAX_STABLE = (
+      "gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:"
+      f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
+  )
+  MAXTEXT_TPU_JAX_NIGHTLY = (
+      "gcr.io/tpu-prod-env-multipod/maxtext_jax_nightly:"
+      f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
+  )
+  MAXTEXT_GPU_JAX_STABLE = (
+      "gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_stable:"
+      f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
+  )
+  MAXTEXT_GPU_JAX_NIGHTLY = (
+      "gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_nightly:"
+      f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
+  )
diff --git a/scripts/code-style.sh b/scripts/code-style.sh
index 576fff94..940f80e8 100755
--- a/scripts/code-style.sh
+++ b/scripts/code-style.sh
@@ -26,7 +26,7 @@ done
 
 for folder in "${FOLDERS_TO_FORMAT[@]}"
 do
-  pylint "./$folder" --fail-under=9
+  pylint "./$folder" --fail-under=9.6
 done
 
 echo "Successfully clean up all codes."
diff --git a/xlml/apis/gcp_config.py b/xlml/apis/gcp_config.py
index a79c12c6..501ffcf6 100644
--- a/xlml/apis/gcp_config.py
+++ b/xlml/apis/gcp_config.py
@@ -15,8 +15,9 @@
 """Config file for Google Cloud Project (GCP)."""
 
 import dataclasses
-from xlml.apis import metric_config
+
 from dags.vm_resource import Project
+from xlml.apis import metric_config
 
 
 @dataclasses.dataclass
diff --git a/xlml/apis/metric_config.py b/xlml/apis/metric_config.py
index eb13885b..6fe27e30 100644
--- a/xlml/apis/metric_config.py
+++ b/xlml/apis/metric_config.py
@@ -43,7 +43,7 @@ class SshEnvVars(enum.Enum):
 
 @dataclasses.dataclass
 class JSONLinesConfig:
-  """This is a class to set up JSON Lines config.
+  """A class to set up JSON Lines config.
 
   Attributes:
     file_location: The locatioin of the file in GCS. When
@@ -55,7 +55,7 @@ class JSONLinesConfig:
 
 @dataclasses.dataclass
 class SummaryConfig:
-  """This is a class to set up TensorBoard summary config.
+  """A class to set up TensorBoard summary config.
 
   Attributes:
     file_location: The locatioin of the file in GCS. When
@@ -79,7 +79,7 @@ class SummaryConfig:
 
 @dataclasses.dataclass
 class ProfileConfig:
-  """This is a class to set up profile config.
+  """A class to set up profile config.
 
   Attributes:
     file_locations: The locatioin of the file in GCS. When
@@ -93,8 +93,7 @@ class ProfileConfig:
 
 @dataclasses.dataclass
 class MetricConfig:
-  """This is a class to set up config of Benchmark metric,
-  dimension, and profile.
+  """A class to set up config of Benchmark metric, dimension, and profile.
 
   Attributes:
     json_lines: The config for JSON Lines input.
diff --git a/xlml/apis/task.py b/xlml/apis/task.py
index 14b73cc8..6354c3a3 100644
--- a/xlml/apis/task.py
+++ b/xlml/apis/task.py
@@ -18,12 +18,12 @@
 import dataclasses
 import datetime
 import shlex
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Optional, Tuple, Union
 import airflow
 from airflow.models.taskmixin import DAGNode
 from airflow.utils.task_group import TaskGroup
 from xlml.apis import gcp_config, metric_config, test_config
-from xlml.utils import gpu, metric, name_format, ssh, tpu, xpk, gke, startup_script
+from xlml.utils import gpu, metric, name_format, ssh, tpu, xpk, gke
 
 
 class BaseTask(abc.ABC):
@@ -47,6 +47,7 @@ class TpuQueuedResourceTask(BaseTask):
     task_test_config: Test configs to run on this TPU.
     task_gcp_config: Runtime TPU creation parameters.
     task_metric_config: Metric configs to process metrics.
+    tpu_create_timeout: Time to provision the machine.
     tpu_name_env_var: The flag to define if set up env variable for tpu name.
     all_workers: The flag to define if run commands on all workers or worker 0
       only.
@@ -71,8 +72,9 @@ def run(self) -> DAGNode:
         group_id=self.task_test_config.benchmark_id, prefix_group_id=True
     ) as group:
       provision, queued_resource, ssh_keys, gcs_location = self.provision()
-      # If you didn't set `MetricConfig.use_runtime_generated_gcs_folder` value in the
-      # test config script then `gcs_location` will take no effect.
+      # If you didn't set `MetricConfig.use_runtime_generated_gcs_folder`
+      # value in the test config script then `gcs_location` will take
+      # no effect.
       if (
           self.task_metric_config
           and self.task_metric_config.use_runtime_generated_gcs_folder
diff --git a/xlml/apis/test_config.py b/xlml/apis/test_config.py
index 8c3f6b67..aec85de0 100644
--- a/xlml/apis/test_config.py
+++ b/xlml/apis/test_config.py
@@ -44,13 +44,14 @@ def __init__(self, accelerator, task_owner=None, test_name):
 """
 
 import abc
-import attrs
-from dags.vm_resource import TpuVersion
 import json
 import os
 import shlex
 from typing import Any, Generic, Iterable, List, Optional, TypeVar
 
+import attrs
+from dags.vm_resource import TpuVersion
+
 
 class Accelerator(abc.ABC):
   """Represents an ML accelerator."""
@@ -124,6 +125,7 @@ class TestConfig(abc.ABC, Generic[A]):
     accelerator: Accelerator type required for this test.
     time_out_in_min: Test timeout in minutes.
     task_owner: Task owner username or link.
+    gcs_subfolder: Subfolder name for default GCS bucket.
   """
 
   accelerator: A
diff --git a/xlml/utils/bigquery.py b/xlml/utils/bigquery.py
index f8d2f83a..cde288c8 100644
--- a/xlml/utils/bigquery.py
+++ b/xlml/utils/bigquery.py
@@ -20,11 +20,11 @@
 import enum
 import math
 from typing import Iterable, Optional
+
 from absl import logging
-from xlml.apis import metric_config
 import google.auth
 from google.cloud import bigquery
-
+from xlml.apis import metric_config
 
 BENCHMARK_BQ_JOB_TABLE_NAME = "job_history"
 BENCHMARK_BQ_METRIC_TABLE_NAME = "metric_history"
@@ -73,6 +73,7 @@ class BigQueryMetricClient:
   Attributes:
     project: The project name for database.
     database: The database name for BigQuery.
+    client: The client for BigQuery Metric.
   """
 
   def __init__(
diff --git a/xlml/utils/bigquery_test.py b/xlml/utils/bigquery_test.py
index 41b143b0..03924453 100644
--- a/xlml/utils/bigquery_test.py
+++ b/xlml/utils/bigquery_test.py
@@ -71,6 +71,7 @@ def test_is_valid_metric(self, x: float, expected_value: bool):
       bigquery.Client, "insert_rows", return_value=["there is an error"]
   )
   def test_insert_failure(self, default, get_table, insert_rows):
+    del default, get_table, insert_rows
     bq_metric = test_bigquery.BigQueryMetricClient()
     self.assertRaises(RuntimeError, bq_metric.insert, self.test_runs)
 
@@ -80,6 +81,7 @@ def test_insert_failure(self, default, get_table, insert_rows):
   @mock.patch.object(bigquery.Client, "get_table", return_value="mock_table")
   @mock.patch.object(bigquery.Client, "insert_rows", return_value=[])
   def test_insert_success(self, default, get_table, insert_rows):
+    del default, get_table, insert_rows
     bq_metric = test_bigquery.BigQueryMetricClient()
     bq_metric.insert(self.test_runs)
 
diff --git a/xlml/utils/gke.py b/xlml/utils/gke.py
index c77d0a91..a35fd0b3 100644
--- a/xlml/utils/gke.py
+++ b/xlml/utils/gke.py
@@ -13,6 +13,8 @@
 
 from xlml.apis import gcp_config
 
+"""Utilities for GKE."""
+
 
 def get_authenticated_client(
     project_name: str, region: str, cluster_name: str
diff --git a/xlml/utils/gpu.py b/xlml/utils/gpu.py
index ff3eb6a5..b6c7abbd 100644
--- a/xlml/utils/gpu.py
+++ b/xlml/utils/gpu.py
@@ -280,7 +280,7 @@ def wait_for_resource_creation(operation_name: airflow.XComArg):
             operation.http_error_message
         )
       elif operation.warnings:
-        logging.warning(f"Warnings during resource creation:\n")
+        logging.warning("Warnings during resource creation:\n")
         for warning in operation.warnings:
           logging.warning(f" - {warning.code}: {warning.message}")
       return True
@@ -388,7 +388,7 @@ def wait_for_resource_deletion(operation_name: airflow.XComArg):
             operation.http_error_message
         )
       elif operation.warnings:
-        logging.warning(f"Warnings during resource deletion:\n")
+        logging.warning("Warnings during resource deletion:\n")
         for warning in operation.warnings:
           logging.warning(f" - {warning.code}: {warning.message}")
       return True
diff --git a/xlml/utils/metric_test.py b/xlml/utils/metric_test.py
index c93f0d94..7ec4f99b 100644
--- a/xlml/utils/metric_test.py
+++ b/xlml/utils/metric_test.py
@@ -29,9 +29,6 @@
 from dags.vm_resource import TpuVersion, RuntimeVersion
 
 
-"""Tests for Benchmark metric.py."""
-
-
 class BenchmarkMetricTest(parameterized.TestCase, absltest.TestCase):
 
   def get_tempdir(self):
@@ -152,7 +149,7 @@ def test_aggregate_metrics(
     self.assertAlmostEqual(actual_value, expected_value)
 
   @mock.patch("xlml.utils.metric.download_object_from_gcs")
-  def test_process_json_lines(self, download_object_from_gcs):
+  def test_process_json_lines(self, _):
     path = "/tmp/ml-auto-solutions-metrics.jsonl"
     test_run1 = {
         "metrics": {"accuracy": 0.95, "MFU": 0.50},
@@ -273,10 +270,10 @@ def test_add_airflow_metadata(self):
               "COMPOSER_LOCATION": "test_location",
               "COMPOSER_ENVIRONMENT": "test_env",
           },
-      ) as mock_variable:
+      ) as _:
         with mock.patch.object(
             composer, "get_airflow_url", return_value="http://airflow"
-        ) as mock_object:
+        ) as _:
           raw_meta = [
               [
                   bigquery.MetadataHistoryRow(
@@ -313,7 +310,11 @@ def test_add_airflow_metadata(self):
               bigquery.MetadataHistoryRow(
                   job_uuid=uuid,
                   metadata_key="airflow_dag_run_link",
-                  metadata_value="http://airflow/dags/benchmark_test/grid?dag_run_id=manual__2023-08-07T21%3A03%3A49.181263%2B00%3A00&task_id=post_process",
+                  metadata_value=(
+                      "http://airflow/dags/benchmark_test/grid?"
+                      "dag_run_id=manual__2023-08-07T21%3A03%3A49."
+                      "181263%2B00%3A00&task_id=post_process"
+                  ),
               )
           )
 
diff --git a/xlml/utils/tpu.py b/xlml/utils/tpu.py
index d9ed7db1..927dc8a1 100644
--- a/xlml/utils/tpu.py
+++ b/xlml/utils/tpu.py
@@ -166,7 +166,7 @@ def create_queued_resource_request(
         queued_resource=queued_resource,
     )
     response = qr_operation.result()
-    logging.info('Create QR response: {}'.format(response))
+    logging.info(f'Create QR response: {response}')
     # TODO(wcromar): do anything about failures
 
     return response.name
@@ -248,7 +248,7 @@ def delete_tpu_nodes_request(qualified_name: str):
     for node in qr.tpu.node_spec:
       try:
         op = client.delete_node(name=f'{node.parent}/nodes/{node.node_id}')
-        logging.info('Delete node state: {}'.format(op))
+        logging.info(f'Delete node state: {op}')
       except google.api_core.exceptions.NotFound:
         logging.info(f'{node.node_id} is already deleted')
 
diff --git a/xlml/utils/xpk.py b/xlml/utils/xpk.py
index 94c2f674..4fd03ff6 100644
--- a/xlml/utils/xpk.py
+++ b/xlml/utils/xpk.py
@@ -21,9 +21,7 @@
 from airflow.decorators import task
 from airflow.exceptions import AirflowFailException
 from airflow.hooks.subprocess import SubprocessHook
-from datetime import timedelta
 from kubernetes import client as k8s_client
-from kubernetes.client import models as k8s_models
 from xlml.apis import metric_config
 from xlml.utils import gke
 from dags.vm_resource import GpuVersion
@@ -136,7 +134,7 @@ def wait_for_workload_completion(
     return False
 
   if any(pod.status.phase in ["Pending", "Running"] for pod in pods.items):
-    logging.info(f"At least one pod has yet to complete")
+    logging.info("At least one pod has yet to complete.")
     return False
 
   try: