Merge branch 'compute-tracking' into main

ZumoLabs · Jun 7, 2021 · c4cf9ef · c4cf9ef
2 parents 7ee4bfc + f0d2c7b
commit c4cf9ef
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 53 deletions.
diff --git a/cli/cli.py b/cli/cli.py
@@ -7,7 +7,7 @@
 
 from cli.config import initialize_config, read_config, write_config, get_endpoint
 from cli.loader import Loader
-from cli.utils import parse_args, resolve_sweep, use_project
+from cli.utils import parse_args, resolve_sweep, use_project, print_list_as_columns
 from zpy.files import read_json, to_pathlib_path
 
 SMALL_WIDTH = 12
@@ -28,7 +28,7 @@ def cli():
 
 
 @cli.command("help")
-def help():
+def cli_help():
     """display help
 
     This will display help in order to provide users with more information
@@ -70,13 +70,13 @@ def set_env(env):
     click.echo("zpy login to fetch token")
 
 
-@cli.group()
-def project():
+@cli.group("project")
+def cli_project():
     """Manage global project workspace."""
     pass
 
 
-@project.command("set")
+@cli_project.command("set")
 @click.argument("project_uuid", type=click.UUID)
 def set_project(project_uuid):
     """Set global PROJECT uuid."""
@@ -88,7 +88,7 @@ def set_project(project_uuid):
     click.echo(f"  {old_project_uuid} -> {config['PROJECT']}")
 
 
-@project.command("clear")
+@cli_project.command("clear")
 def clear_project():
     """Clear global PROJECT uuid."""
     config = read_config()
@@ -124,7 +124,7 @@ def login(username, password):
 
 
 @cli.command("config")
-def config():
+def cli_config():
     """display config
 
     Display current configuration file to developer.
@@ -147,16 +147,16 @@ def version():
 # ------- LIST
 
 
-@cli.group()
-def list():
+@cli.group("list")
+def cli_list():
     """List objects.
 
     List group is used for list commands on backend objects.
     """
     pass
 
 
-@list.command("datasets")
+@cli_list.command("datasets")
 @click.argument("filters", nargs=-1)
 @use_project()
 def list_datasets(filters, project=None):
@@ -206,7 +206,7 @@ def list_datasets(filters, project=None):
         )
 
 
-@list.command("sims")
+@cli_list.command("sims")
 @click.argument("filters", nargs=-1)
 @use_project()
 def list_sims(filters, project=None):
@@ -258,7 +258,7 @@ def list_sims(filters, project=None):
         )
 
 
-@list.command("projects")
+@cli_list.command("projects")
 @click.argument("filters", nargs=-1)
 def list_projects(filters):
     """list projects
@@ -270,7 +270,7 @@ def list_projects(filters):
     try:
         filters = parse_args(filters)
     except Exception:
-        click.secho("Failed to parse filters: {args}", fg="yellow", err=True)
+        click.secho(f"Failed to parse filters: {filters}", fg="yellow", err=True)
         return
 
     try:
@@ -301,7 +301,7 @@ def list_projects(filters):
         )
 
 
-@list.command("accounts")
+@cli_list.command("accounts")
 @click.argument("filters", nargs=-1)
 def list_accounts(filters):
     """list accounts
@@ -313,7 +313,7 @@ def list_accounts(filters):
     try:
         filters = parse_args(filters)
     except Exception:
-        click.secho("Failed to parse filters: {args}", fg="yellow", err=True)
+        click.secho(f"Failed to parse filters: {filters}", fg="yellow", err=True)
         return
 
     try:
@@ -344,7 +344,7 @@ def list_accounts(filters):
         )
 
 
-@list.command("jobs")
+@cli_list.command("jobs")
 @click.argument("filters", nargs=-1)
 @use_project()
 def list_jobs(filters, project=None):
@@ -360,7 +360,7 @@ def list_jobs(filters, project=None):
         if project:
             filters["project"] = project
     except Exception:
-        click.secho("Failed to parse filters: {args}", fg="yellow", err=True)
+        click.secho(f"Failed to parse filters: {filters}", fg="yellow", err=True)
         return
 
     try:
@@ -390,7 +390,7 @@ def list_jobs(filters, project=None):
 # ------- GET
 
 
-@cli.group()
+@cli.group("get")
 def get():
     """get object
 
@@ -460,7 +460,7 @@ def get_sim(name, path):
 # -------  UPLOAD
 
 
-@cli.group()
+@cli.group("upload")
 def upload():
     """upload object
 
@@ -528,7 +528,7 @@ def upload_dataset(name, path, project=None):
 # ------- CREATE
 
 
-@cli.group()
+@cli.group("create")
 def create():
     """create object
 
@@ -578,7 +578,7 @@ def create_dataset(name, sim, args, project=None):
     try:
         dataset_config = parse_args(args)
     except Exception:
-        click.secho("Failed to parse args: {args}", fg="yellow", err=True)
+        click.secho(f"Failed to parse args: {args}", fg="yellow", err=True)
         return
     try:
         create_generated_dataset(name, sim, parse_args(args), project)
@@ -619,7 +619,7 @@ def create_sweep(name, sim, number, args, project=None):
     try:
         dataset_config = parse_args(args)
     except Exception:
-        click.secho("Failed to parse args: {args}", fg="yellow", err=True)
+        click.secho(f"Failed to parse args: {args}", fg="yellow", err=True)
         return
     for i in range(int(number)):
         dataset_name = f"{name} seed{i}"
@@ -642,48 +642,59 @@ def create_sweep(name, sim, number, args, project=None):
 
 @create.command("job")
 @click.argument("name")
-@click.argument("operation")
-@click.option("filters", "-f", multiple=True)
+@click.argument("operation", type=click.Choice(["package", "tvt", "train"]))
+@click.option(
+    "filters",
+    "-f",
+    multiple=True,
+    help="Key/value pairs separated by spaces. Passed as query params in the API call to filter data sets.",
+)
 @click.option(
     "configfile",
     "--configfile",
     type=click.Path(exists=True, dir_okay=False, resolve_path=True),
+    help="Path to json file",
 )
 @click.option(
     "sweepfile",
     "--sweepfile",
     type=click.Path(exists=True, dir_okay=False, resolve_path=True),
+    help="Path to json file",
 )
 @use_project(required=True)
 def create_job(name, operation, filters, configfile, sweepfile, project=None):
     """create job
 
-    Create a job object in backend that will trigger an operation on
-    datasets filtered by the filters. Requires PROJECT set via `zpy project`.
-
-    Args:
-        name (str): name of new job
-        operation (str): name of operation to run on datasets
-        filters (str): string filters for dataset names to run job on
-        configfile (str): json configuration for the job
-        sweepfile (str): sweep json to launch a suite of jobs
-        project (str): project uuid
+    Create a job called NAME within PROJECT to perform OPERATION on a group of datasets defined by the FILTERS
+    provided by -f. Requires PROJECT set via `zpy project`.
     """
     from cli.datasets import filter_datasets
     from cli.jobs import create_new_job
 
-    datasets = []
+    filtered_datasets = []
     for dfilter in filters:
         try:
             with Loader(f"Filtering datasets by '{dfilter}'..."):
-                filtered_datasets = filter_datasets(dfilter, project)
-            filtered_datasets_names = [*filtered_datasets.keys()]
-            click.echo(
-                f"Filtered datasets by filter '{dfilter}':\n{filtered_datasets_names}"
-            )
-            datasets.append(filtered_datasets.values())
+                datasets_by_type = filter_datasets(dfilter, project)
+
+            for [dataset_type, datasets] in datasets_by_type.items():
+                count = len(datasets)
+                click.secho(f"Found {count} of type<{dataset_type}>")
+
+                if count == 0:
+                    continue
+
+                dataset_names = list(datasets.values())
+                print_list_as_columns(dataset_names)
+
+            filtered_datasets_ids = [
+                data_set_id
+                for data_sets in datasets_by_type.values()
+                for data_set_id in data_sets.keys()
+            ]
+            filtered_datasets.extend(filtered_datasets_ids)
         except requests.exceptions.HTTPError as e:
-            click.secho(f"Failed to filter datsets {e}", fg="red", err=True)
+            click.secho(f"Failed to filter datasets {e}", fg="red", err=True)
 
     job_configs = []
     if configfile:
@@ -709,7 +720,7 @@ def create_job(name, operation, filters, configfile, sweepfile, project=None):
     for i, config in enumerate(job_configs):
         job_name = name if i == 0 else f"{name} {i}"
         try:
-            create_new_job(job_name, operation, config, datasets, project)
+            create_new_job(job_name, operation, config, filtered_datasets, project)
             click.secho(
                 f"Created {operation} job '{job_name}' with config {config}", fg="green"
             )
@@ -724,7 +735,7 @@ def create_job(name, operation, filters, configfile, sweepfile, project=None):
 # ------- LOGS
 
 
-@cli.group()
+@cli.group("logs")
 def logs():
     """logs
 

diff --git a/cli/datasets.py b/cli/datasets.py
@@ -23,25 +23,39 @@ def filter_datasets(dfilter, project, url, auth_headers):
         auth_headers: authentication for backend
 
     Return:
-        dict: filtered datasets by dfilter {'name': 'id'}
+        dict: filtered datasets by dfilter
+        {
+            'uploaded-data-sets': {'id': 'name'},
+            'generated-data-sets': {'id': 'name'},
+            'job-data-sets': {'id': 'name'},
+        }
     """
-    filtered_datasets = {}
+    filtered_datasets = {key: {} for key in DATASET_TYPES}
     field, pattern, regex = parse_filter(dfilter)
     for dataset_type in DATASET_TYPES:
         endpoint = f"{url}/api/v1/{dataset_type}/"
         params = {
-            **params,
+            "project": project,
             f"{field}__{pattern}": regex,
         }
 
-        while endpoint is not None:
-            r = requests.get(endpoint, params=params, headers=auth_headers)
+        # Do initial request
+        r = requests.get(endpoint, params=params, headers=auth_headers)
+        if r.status_code != 200:
+            r.raise_for_status()
+        body = json.loads(r.text)
+        for data_set in body["results"]:
+            filtered_datasets[dataset_type][data_set["id"]] = data_set["name"]
+
+        # Traverse the next links until we've gotten all of the data sets
+        while body["next"] is not None:
+            r = requests.get(body["next"], headers=auth_headers)
             if r.status_code != 200:
                 r.raise_for_status()
-            response = json.loads(r.text)
-            for r in response["results"]:
-                filtered_datasets[r["name"]] = r["id"]
-            endpoint = response["next"]
+            body = json.loads(r.text)
+            for data_set in body["results"]:
+                filtered_datasets[dataset_type][data_set["id"]] = data_set["name"]
+
     return filtered_datasets
 
 

diff --git a/cli/utils.py b/cli/utils.py
@@ -1,4 +1,5 @@
 import functools
+import math
 from copy import deepcopy
 from itertools import product
 from urllib.request import urlopen
@@ -169,3 +170,30 @@ def wrapper(*args, **kwargs):
         return wrapper
 
     return use_project_inner
+
+
+def print_list_as_columns(list_of_strings, num_cols=5, indent_prefix="    "):
+    """Format and echo a list of strings into nicely formatted columns.
+
+    Args:
+        list_of_strings (list of str): A list of similar strings to format into columns.
+        num_cols (int): Desired number of columns.
+        indent_prefix (str): String to attach to the beginning of every printed line.
+    Returns:
+        None
+    """
+    count = len(list_of_strings)
+    col_width = max(len(string) for string in list_of_strings)
+    num_rows = math.ceil(count / num_cols)
+    for i in range(num_rows):
+        start_index = i * num_cols
+        end_index = (i + 1) * num_cols
+        if end_index > len(list_of_strings):
+            end_index = len(list_of_strings)
+        row = list_of_strings[start_index:end_index]
+
+        format_string = indent_prefix + " ".join(
+            ["{{:<{}}}".format(col_width) for _ in row]
+        )
+
+        click.echo(format_string.format(*row))