Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement to tlo batch-list subcommand #1041

Merged
merged 10 commits into from
Aug 1, 2023
79 changes: 52 additions & 27 deletions src/tlo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import click
import dateutil.parser
import pandas as pd
from azure import batch
from azure.batch import batch_auth
from azure.batch import models as batch_models
Expand Down Expand Up @@ -362,45 +363,69 @@ def batch_job(ctx, job_id, raw, show_tasks):
@click.option("--completed", "status", flag_value="completed", default=False, help="Only display completed jobs")
@click.option("--active", "status", flag_value="active", default=False, help="Only display active jobs")
@click.option("-n", default=5, type=int, help="Maximum number of jobs to list (default is 5)")
@click.option("--username", type=str, hidden=True)
@click.pass_context
def batch_list(ctx, status, n, find):
def batch_list(ctx, status, n, find, username):
"""List and find running and completed jobs."""
print(">Querying batch system\r", end="")
print("Querying Batch...")
config = load_config(ctx.obj["config_file"])

if username is None:
username = config["DEFAULT"]["USERNAME"]

batch_client = get_batch_client(
config["BATCH"]["NAME"],
config["BATCH"]["KEY"],
config["BATCH"]["URL"]
)

# get list of all batch jobs
jobs = batch_client.job.list(
# create client to connect to file share
share_client = ShareClient.from_connection_string(config['STORAGE']['CONNECTION_STRING'],
config['STORAGE']['FILESHARE'])

# get list of all directories in user_directory
directories = list(share_client.list_directories_and_files(f"{username}/"))

if len(directories) == 0:
print("No jobs found.")
return

# convert directories to set
directories = set([directory["name"] for directory in directories])

# get all jobs in batch system
jobs_list = list(batch_client.job.list(
job_list_options=batch_models.JobListOptions(
expand='stats'
)
)
count = 0
for job in jobs:
jad = job.as_dict()
print_job = False
if (status is None or
("completed" in status and jad["state"] == "completed") or
("active" in status and jad["state"] == "active")):
if find is not None:
if find in jad["id"]:
print_job = True
else:
print_job = True
))

if print_job:
print_basic_job_details(jad)
if "stats" in jad:
print(f"{'Succeeded tasks'.ljust(JOB_LABEL_PADDING)}: {jad['stats']['num_succeeded_tasks']}")
print(f"{'Failed tasks'.ljust(JOB_LABEL_PADDING)}: {jad['stats']['num_failed_tasks']}")
print()
count += 1
if count == n:
break
# create a dataframe of the jobs, using the job.as_dict() record
# filter the list of jobs by those ids in the directories
jobs_list = [job.as_dict() for job in jobs_list if job.id in directories]
jobs: pd.DataFrame = pd.DataFrame(jobs_list)
jobs = jobs[["id", "creation_time", "state"]]

# get subset where id contains the find string
if find is not None:
jobs = jobs[jobs["id"].str.contains(find)]

# filter by status
if status is not None:
jobs = jobs[jobs["state"] == status]

if len(jobs) == 0:
print("No jobs found.")
return

# sort by creation time
jobs = jobs.sort_values("creation_time", ascending=False)

# get the first n rows
jobs = jobs.head(n)

# print the dataframe
print(jobs.to_string(index=False))


def print_basic_job_details(job: dict):
Expand Down Expand Up @@ -493,7 +518,7 @@ def load_config(config_file):


def load_server_config(kv_uri, tenant_id) -> Dict[str, Dict]:
"""Retrieve the server configuration for running Batch using the user"s Azure credentials
"""Retrieve the server configuration for running Batch using the user's Azure credentials

Allows user to login using credentials from Azure CLI or interactive browser.

Expand Down