Skip to content

Commit

Permalink
Increase fail score for pylint (#240)
Browse files Browse the repository at this point in the history
  • Loading branch information
RissyRan authored Apr 4, 2024
1 parent 283d0a5 commit f192fb3
Show file tree
Hide file tree
Showing 14 changed files with 61 additions and 36 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functool
# Logging modules to check that the string format arguments are in logging
# function parameter format
logging-modules=logging,absl.logging,tensorflow.io.logging
disable=logging-fstring-interpolation


[SIMILARITIES]
Expand Down
30 changes: 23 additions & 7 deletions dags/vm_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

"""The file for common projects, zone, and runtime versions."""

import enum
import datetime
import enum


V5_NETWORKS_PREFIX = "projects/tpu-prod-env-automated"
Expand Down Expand Up @@ -55,7 +55,8 @@ class Zone(enum.Enum):
US_CENTRAL1_B = "us-central1-b"
# reserved v4-8 & v4-32 in cloud-ml-auto-solutions
US_CENTRAL2_B = "us-central2-b"
# reserved/on-demand v2-8 in cloud-ml-auto-solutions & reserved h100 in supercomputer-testing
# reserved/on-demand v2-8 in cloud-ml-auto-solutions
# & reserved h100 in supercomputer-testing
US_CENTRAL1_C = "us-central1-c"
# committed resource for A100
US_CENTRAL1_F = "us-central1-f"
Expand Down Expand Up @@ -137,8 +138,23 @@ class DockerImage(enum.Enum):
"""Common docker images."""

XPK_JAX_TEST = "gcr.io/cloud-ml-auto-solutions/xpk_jax_test:latest"
PYTORCH_NIGHTLY = f"us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_{datetime.datetime.today().strftime('%Y%m%d')}"
MAXTEXT_TPU_JAX_STABLE = f"gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:{datetime.datetime.today().strftime('%Y-%m-%d')}"
MAXTEXT_TPU_JAX_NIGHTLY = f"gcr.io/tpu-prod-env-multipod/maxtext_jax_nightly:{datetime.datetime.today().strftime('%Y-%m-%d')}"
MAXTEXT_GPU_JAX_STABLE = f"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_stable:{datetime.datetime.today().strftime('%Y-%m-%d')}"
MAXTEXT_GPU_JAX_NIGHTLY = f"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_nightly:{datetime.datetime.today().strftime('%Y-%m-%d')}"
PYTORCH_NIGHTLY = (
"us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/"
f"xla:nightly_3.10_tpuvm_{datetime.datetime.today().strftime('%Y%m%d')}"
)
MAXTEXT_TPU_JAX_STABLE = (
"gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:"
f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
)
MAXTEXT_TPU_JAX_NIGHTLY = (
"gcr.io/tpu-prod-env-multipod/maxtext_jax_nightly:"
f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
)
MAXTEXT_GPU_JAX_STABLE = (
"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_stable:"
f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
)
MAXTEXT_GPU_JAX_NIGHTLY = (
"gcr.io/tpu-prod-env-multipod/maxtext_gpu_jax_nightly:"
f"{datetime.datetime.today().strftime('%Y-%m-%d')}"
)
2 changes: 1 addition & 1 deletion scripts/code-style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ done

for folder in "${FOLDERS_TO_FORMAT[@]}"
do
pylint "./$folder" --fail-under=9
pylint "./$folder" --fail-under=9.6
done

echo "Successfully clean up all codes."
3 changes: 2 additions & 1 deletion xlml/apis/gcp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
"""Config file for Google Cloud Project (GCP)."""

import dataclasses
from xlml.apis import metric_config

from dags.vm_resource import Project
from xlml.apis import metric_config


@dataclasses.dataclass
Expand Down
9 changes: 4 additions & 5 deletions xlml/apis/metric_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class SshEnvVars(enum.Enum):

@dataclasses.dataclass
class JSONLinesConfig:
"""This is a class to set up JSON Lines config.
"""A class to set up JSON Lines config.
Attributes:
file_location: The locatioin of the file in GCS. When
Expand All @@ -55,7 +55,7 @@ class JSONLinesConfig:

@dataclasses.dataclass
class SummaryConfig:
"""This is a class to set up TensorBoard summary config.
"""A class to set up TensorBoard summary config.
Attributes:
file_location: The locatioin of the file in GCS. When
Expand All @@ -79,7 +79,7 @@ class SummaryConfig:

@dataclasses.dataclass
class ProfileConfig:
"""This is a class to set up profile config.
"""A class to set up profile config.
Attributes:
file_locations: The locatioin of the file in GCS. When
Expand All @@ -93,8 +93,7 @@ class ProfileConfig:

@dataclasses.dataclass
class MetricConfig:
"""This is a class to set up config of Benchmark metric,
dimension, and profile.
"""A class to set up config of Benchmark metric, dimension, and profile.
Attributes:
json_lines: The config for JSON Lines input.
Expand Down
10 changes: 6 additions & 4 deletions xlml/apis/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import dataclasses
import datetime
import shlex
from typing import Any, Dict, Optional, Tuple, Union
from typing import Optional, Tuple, Union
import airflow
from airflow.models.taskmixin import DAGNode
from airflow.utils.task_group import TaskGroup
from xlml.apis import gcp_config, metric_config, test_config
from xlml.utils import gpu, metric, name_format, ssh, tpu, xpk, gke, startup_script
from xlml.utils import gpu, metric, name_format, ssh, tpu, xpk, gke


class BaseTask(abc.ABC):
Expand All @@ -47,6 +47,7 @@ class TpuQueuedResourceTask(BaseTask):
task_test_config: Test configs to run on this TPU.
task_gcp_config: Runtime TPU creation parameters.
task_metric_config: Metric configs to process metrics.
tpu_create_timeout: Time to provision the machine.
tpu_name_env_var: The flag to define if set up env variable for tpu name.
all_workers: The flag to define if run commands on all workers or worker 0
only.
Expand All @@ -71,8 +72,9 @@ def run(self) -> DAGNode:
group_id=self.task_test_config.benchmark_id, prefix_group_id=True
) as group:
provision, queued_resource, ssh_keys, gcs_location = self.provision()
# If you didn't set `MetricConfig.use_runtime_generated_gcs_folder` value in the
# test config script then `gcs_location` will take no effect.
# If you didn't set `MetricConfig.use_runtime_generated_gcs_folder`
# value in the test config script then `gcs_location` will take
# no effect.
if (
self.task_metric_config
and self.task_metric_config.use_runtime_generated_gcs_folder
Expand Down
6 changes: 4 additions & 2 deletions xlml/apis/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ def __init__(self, accelerator, task_owner=None, test_name):
"""

import abc
import attrs
from dags.vm_resource import TpuVersion
import json
import os
import shlex
from typing import Any, Generic, Iterable, List, Optional, TypeVar

import attrs
from dags.vm_resource import TpuVersion


class Accelerator(abc.ABC):
"""Represents an ML accelerator."""
Expand Down Expand Up @@ -124,6 +125,7 @@ class TestConfig(abc.ABC, Generic[A]):
accelerator: Accelerator type required for this test.
time_out_in_min: Test timeout in minutes.
task_owner: Task owner username or link.
gcs_subfolder: Subfolder name for default GCS bucket.
"""

accelerator: A
Expand Down
5 changes: 3 additions & 2 deletions xlml/utils/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
import enum
import math
from typing import Iterable, Optional

from absl import logging
from xlml.apis import metric_config
import google.auth
from google.cloud import bigquery

from xlml.apis import metric_config

BENCHMARK_BQ_JOB_TABLE_NAME = "job_history"
BENCHMARK_BQ_METRIC_TABLE_NAME = "metric_history"
Expand Down Expand Up @@ -73,6 +73,7 @@ class BigQueryMetricClient:
Attributes:
project: The project name for database.
database: The database name for BigQuery.
client: The client for BigQuery Metric.
"""

def __init__(
Expand Down
2 changes: 2 additions & 0 deletions xlml/utils/bigquery_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_is_valid_metric(self, x: float, expected_value: bool):
bigquery.Client, "insert_rows", return_value=["there is an error"]
)
def test_insert_failure(self, default, get_table, insert_rows):
del default, get_table, insert_rows
bq_metric = test_bigquery.BigQueryMetricClient()
self.assertRaises(RuntimeError, bq_metric.insert, self.test_runs)

Expand All @@ -80,6 +81,7 @@ def test_insert_failure(self, default, get_table, insert_rows):
@mock.patch.object(bigquery.Client, "get_table", return_value="mock_table")
@mock.patch.object(bigquery.Client, "insert_rows", return_value=[])
def test_insert_success(self, default, get_table, insert_rows):
del default, get_table, insert_rows
bq_metric = test_bigquery.BigQueryMetricClient()
bq_metric.insert(self.test_runs)

Expand Down
2 changes: 2 additions & 0 deletions xlml/utils/gke.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from xlml.apis import gcp_config

"""Utilities for GKE."""


def get_authenticated_client(
project_name: str, region: str, cluster_name: str
Expand Down
4 changes: 2 additions & 2 deletions xlml/utils/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def wait_for_resource_creation(operation_name: airflow.XComArg):
operation.http_error_message
)
elif operation.warnings:
logging.warning(f"Warnings during resource creation:\n")
logging.warning("Warnings during resource creation:\n")
for warning in operation.warnings:
logging.warning(f" - {warning.code}: {warning.message}")
return True
Expand Down Expand Up @@ -388,7 +388,7 @@ def wait_for_resource_deletion(operation_name: airflow.XComArg):
operation.http_error_message
)
elif operation.warnings:
logging.warning(f"Warnings during resource deletion:\n")
logging.warning("Warnings during resource deletion:\n")
for warning in operation.warnings:
logging.warning(f" - {warning.code}: {warning.message}")
return True
Expand Down
15 changes: 8 additions & 7 deletions xlml/utils/metric_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
from dags.vm_resource import TpuVersion, RuntimeVersion


"""Tests for Benchmark metric.py."""


class BenchmarkMetricTest(parameterized.TestCase, absltest.TestCase):

def get_tempdir(self):
Expand Down Expand Up @@ -152,7 +149,7 @@ def test_aggregate_metrics(
self.assertAlmostEqual(actual_value, expected_value)

@mock.patch("xlml.utils.metric.download_object_from_gcs")
def test_process_json_lines(self, download_object_from_gcs):
def test_process_json_lines(self, _):
path = "/tmp/ml-auto-solutions-metrics.jsonl"
test_run1 = {
"metrics": {"accuracy": 0.95, "MFU": 0.50},
Expand Down Expand Up @@ -273,10 +270,10 @@ def test_add_airflow_metadata(self):
"COMPOSER_LOCATION": "test_location",
"COMPOSER_ENVIRONMENT": "test_env",
},
) as mock_variable:
) as _:
with mock.patch.object(
composer, "get_airflow_url", return_value="http://airflow"
) as mock_object:
) as _:
raw_meta = [
[
bigquery.MetadataHistoryRow(
Expand Down Expand Up @@ -313,7 +310,11 @@ def test_add_airflow_metadata(self):
bigquery.MetadataHistoryRow(
job_uuid=uuid,
metadata_key="airflow_dag_run_link",
metadata_value="http://airflow/dags/benchmark_test/grid?dag_run_id=manual__2023-08-07T21%3A03%3A49.181263%2B00%3A00&task_id=post_process",
metadata_value=(
"http://airflow/dags/benchmark_test/grid?"
"dag_run_id=manual__2023-08-07T21%3A03%3A49."
"181263%2B00%3A00&task_id=post_process"
),
)
)

Expand Down
4 changes: 2 additions & 2 deletions xlml/utils/tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def create_queued_resource_request(
queued_resource=queued_resource,
)
response = qr_operation.result()
logging.info('Create QR response: {}'.format(response))
logging.info(f'Create QR response: {response}')
# TODO(wcromar): do anything about failures

return response.name
Expand Down Expand Up @@ -248,7 +248,7 @@ def delete_tpu_nodes_request(qualified_name: str):
for node in qr.tpu.node_spec:
try:
op = client.delete_node(name=f'{node.parent}/nodes/{node.node_id}')
logging.info('Delete node state: {}'.format(op))
logging.info(f'Delete node state: {op}')
except google.api_core.exceptions.NotFound:
logging.info(f'{node.node_id} is already deleted')

Expand Down
4 changes: 1 addition & 3 deletions xlml/utils/xpk.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
from airflow.decorators import task
from airflow.exceptions import AirflowFailException
from airflow.hooks.subprocess import SubprocessHook
from datetime import timedelta
from kubernetes import client as k8s_client
from kubernetes.client import models as k8s_models
from xlml.apis import metric_config
from xlml.utils import gke
from dags.vm_resource import GpuVersion
Expand Down Expand Up @@ -136,7 +134,7 @@ def wait_for_workload_completion(
return False

if any(pod.status.phase in ["Pending", "Running"] for pod in pods.items):
logging.info(f"At least one pod has yet to complete")
logging.info("At least one pod has yet to complete.")
return False

try:
Expand Down

0 comments on commit f192fb3

Please sign in to comment.