From 7d9a3804a397fc6a35ea7cb33e5f957a4400bdd2 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 6 Oct 2023 10:43:05 -0400 Subject: [PATCH 01/19] Add labels to fake data. Allow to show/hide labels column in settings. --- clockwork_web/core/jobs_helper.py | 1 + clockwork_web/templates/settings.html | 3 ++- test_common/fake_data.json | 34 ++++++++++++++++++++++++++- test_common/fake_data.py | 3 ++- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index a9ef4c72..c896ba0c 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -405,6 +405,7 @@ def get_jobs_properties_list_per_page(): "user", "job_id", "job_array", + "job_labels", "job_name", "job_state", "start_time", diff --git a/clockwork_web/templates/settings.html b/clockwork_web/templates/settings.html index a2d28e3c..8fefe2ed 100644 --- a/clockwork_web/templates/settings.html +++ b/clockwork_web/templates/settings.html @@ -279,6 +279,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {{ gettext("User (@mila.quebec)") }} {{ gettext("Job ID") }} {{ gettext("Job array") }} + {{ gettext("Job labels") }} {{ gettext("Job name [:20]") }} {{ gettext("Job state") }} {{ gettext("Submit time") }} @@ -291,7 +292,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {% set page_name = "jobs_list" %} - {% for column_name in ["clusters", "user","job_id", "job_array", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %} + {% for column_name in ["clusters", "user","job_id", "job_array", "job_labels", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
{% if (web_settings | check_web_settings_column_display(page_name, column_name)) %} diff --git a/test_common/fake_data.json b/test_common/fake_data.json index b38bb1a2..c9f97488 100644 --- a/test_common/fake_data.json +++ b/test_common/fake_data.json @@ -5962,5 +5962,37 @@ "tensor_cores": 576, "tflops_fp32": 16.31 } + ], + "labels": [ + { + "name": "je suis un label 1", + "user_id": "student06@mila.quebec", + "job_id": 795002 + }, + { + "name": "je suis un label 2", + "user_id": " student16@mila.quebec", + "job_id": 606872 + }, + { + "name": "je suis un label 3", + "user_id": "student15@mila.quebec", + "job_id": 834395 + }, + { + "name": "je suis un label 4", + "user_id": " student15@mila.quebec", + "job_id": 154325 + }, + { + "name": "je suis un label 5", + "user_id": " student15@mila.quebec", + "job_id": 154325 + }, + { + "name": "je suis un label 1", + "user_id": "student12@mila.quebec", + "job_id": 613024 + } ] -} \ No newline at end of file +} diff --git a/test_common/fake_data.py b/test_common/fake_data.py index 5061ffc0..757da134 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -70,8 +70,9 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): [("mila_email_username", 1)], name="users_email_index" ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") + db_insertion_point["labels"].create_index([("user_id", 1), ("job_id", 1)], name="user_id_and_job_id") - for k in ["users", "jobs", "nodes", "gpu"]: + for k in ["users", "jobs", "nodes", "gpu", "labels"]: if k in E: for e in E[k]: db_insertion_point[k].insert_one(e) From 50d8f450b632ce07e914d7f6bdba2db80c776be6 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 6 Oct 2023 12:29:17 -0400 Subject: [PATCH 02/19] (WIP)(not working) try to join job and label collections to get labels along with jobs. --- clockwork_web/core/jobs_helper.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index c896ba0c..5f44ee3a 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -115,6 +115,32 @@ def get_filtered_and_paginated_jobs( if not (type(nbr_items_to_display) == int and nbr_items_to_display > 0): nbr_items_to_display = None + aggregation = [{ + "$match": { + "$expr": mongodb_filter + } + }, { + "$lookup": { + "from": "labels", + "localField": "slurm.job_id", + "foreignField": "job_id", + "let": {"labelJobField": "$job_id", "labelUserField": "$user_id"}, + "pipeline": [ + { + "$match": { + "$expr": { + "$and": [ + {"$eq": ["$slurm.job_id", "$$labelJobField"]}, + {"$eq": ["$cw.mila_email_username", "$$labelUserField"]}, + ] + } + } + } + ], + "as": "job_label", + } + }] + # Retrieve the database mc = get_db() # Get the jobs from it @@ -141,7 +167,7 @@ def get_filtered_and_paginated_jobs( sorting.append(["slurm.job_id", 1]) LD_jobs = list( mc["jobs"] - .find(mongodb_filter) + .aggregate(aggregation) .sort(sorting) .skip(nbr_skipped_items) .limit(nbr_items_to_display) @@ -155,7 +181,7 @@ def get_filtered_and_paginated_jobs( # Moreover, in situations where a lot of data was present, # e.g. 1-2 months of historical data, this has caused errors # on the server because not enough memory was allocated to perform the sorting. - LD_jobs = list(mc["jobs"].find(mongodb_filter)) + LD_jobs = list(mc["jobs"].aggregate(aggregation)) # Set nbr_total_jobs if want_count: From c93cd52fde482892571898169240550ecce9eef2 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 6 Oct 2023 13:21:52 -0400 Subject: [PATCH 03/19] Finally match jobs to labels with two separate mongodb calls. Allow to group by job labels on interface. --- clockwork_web/browser_routes/jobs.py | 2 + clockwork_web/core/jobs_helper.py | 65 ++++++++++++++---------- clockwork_web/core/search_helper.py | 3 ++ clockwork_web/templates/base.html | 10 ++++ clockwork_web/templates/jobs_search.html | 17 +++++++ test_common/fake_data.json | 10 ++-- test_common/fake_data.py | 4 +- 7 files changed, 77 insertions(+), 34 deletions(-) diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py index 289c751a..b5156f6f 100644 --- a/clockwork_web/browser_routes/jobs.py +++ b/clockwork_web/browser_routes/jobs.py @@ -101,6 +101,7 @@ def route_search(): - "sort_asc" is an optional integer and used to specify if sorting is ascending (1) or descending (-1). Default is 1. - "job_array" is optional and used to specify the job array in which we are looking for jobs + - "job_label" is optional and used to specify the label associated to jobs we are looking for .. :quickref: list all Slurm job as formatted html """ @@ -164,6 +165,7 @@ def route_search(): "sort_by": query.sort_by, "sort_asc": query.sort_asc, "job_array": query.job_array, + "job_label": query.job_label, }, ) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index 5f44ee3a..56825fde 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -115,32 +115,6 @@ def get_filtered_and_paginated_jobs( if not (type(nbr_items_to_display) == int and nbr_items_to_display > 0): nbr_items_to_display = None - aggregation = [{ - "$match": { - "$expr": mongodb_filter - } - }, { - "$lookup": { - "from": "labels", - "localField": "slurm.job_id", - "foreignField": "job_id", - "let": {"labelJobField": "$job_id", "labelUserField": "$user_id"}, - "pipeline": [ - { - "$match": { - "$expr": { - "$and": [ - {"$eq": ["$slurm.job_id", "$$labelJobField"]}, - {"$eq": ["$cw.mila_email_username", "$$labelUserField"]}, - ] - } - } - } - ], - "as": "job_label", - } - }] - # Retrieve the database mc = get_db() # Get the jobs from it @@ -167,7 +141,7 @@ def get_filtered_and_paginated_jobs( sorting.append(["slurm.job_id", 1]) LD_jobs = list( mc["jobs"] - .aggregate(aggregation) + .find(mongodb_filter) .sort(sorting) .skip(nbr_skipped_items) .limit(nbr_items_to_display) @@ -181,7 +155,24 @@ def get_filtered_and_paginated_jobs( # Moreover, in situations where a lot of data was present, # e.g. 1-2 months of historical data, this has caused errors # on the server because not enough memory was allocated to perform the sorting. - LD_jobs = list(mc["jobs"].aggregate(aggregation)) + LD_jobs = list(mc["jobs"].find(mongodb_filter)) + + # Get job labels + if LD_jobs: + label_map = {} + # Collect all labels related to found jobs, + # and store them in a dict with keys (user ID, job ID) + for label in mc["labels"].find( + combine_all_mongodb_filters( + {"job_id": {"$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]}} + ) + ): + label_map.setdefault((label["user_id"], label["job_id"]), []).append(label) + # Populate jobs with labels using job's user email and job ID to find related labels in labels dict. + for job in LD_jobs: + job["job_labels"] = label_map.get( + (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])), [] + ) # Set nbr_total_jobs if want_count: @@ -261,6 +252,7 @@ def get_jobs( sort_by="submit_time", sort_asc=-1, job_array=None, + job_label=None, ): """ Set up the filters according to the parameters and retrieve the requested jobs from the database. @@ -278,6 +270,7 @@ def get_jobs( sort_asc Whether or not to sort in ascending order (1) or descending order (-1). job_array ID of job array in which we look for jobs. + job_label label (string) we must find in jobs to look for. Returns: A tuple containing: @@ -285,6 +278,22 @@ def get_jobs( - the total number of jobs corresponding of the filters in the databse, if want_count has been set to True, None otherwise, as second element """ + # If job label is specified, + # get job indices from jobs associated to this label. + if job_label is not None: + mc = get_db() + label_job_ids = [ + str(label["job_id"]) + for label in mc["labels"].find( + combine_all_mongodb_filters({"name": job_label}) + ) + ] + if job_ids: + # If job ids where provided, make intersection between given job ids and labelled job ids. + job_ids = list(set(label_job_ids) & set(job_ids)) + else: + # Otherwise, just use labelled job ids. + job_ids = label_job_ids # Set up and combine filters filter = get_global_filter( diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py index 8d80b33e..f0e55723 100644 --- a/clockwork_web/core/search_helper.py +++ b/clockwork_web/core/search_helper.py @@ -21,6 +21,7 @@ def parse_search_request(user, args, force_pagination=True): want_count = to_boolean(want_count) job_array = args.get("job_array", type=int, default=None) + job_label = args.get("job_label", type=str, default=None) default_page_number = "1" if force_pagination else None @@ -71,6 +72,7 @@ def parse_search_request(user, args, force_pagination=True): sort_asc=sort_asc, want_count=want_count, job_array=job_array, + job_label=job_label, ) ######################### @@ -115,5 +117,6 @@ def search_request(user, args, force_pagination=True): sort_by=query.sort_by, sort_asc=query.sort_asc, job_array=query.job_array, + job_label=query.job_label, ) return (query, jobs, nbr_total_jobs) diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html index bf79e7ea..adee968c 100644 --- a/clockwork_web/templates/base.html +++ b/clockwork_web/templates/base.html @@ -323,6 +323,9 @@

{% if previous_request_args['job_array'] is not none %} + {% endif %} + {% if previous_request_args['job_label'] is not none %} + {% endif %}
@@ -334,6 +337,13 @@

{% endif %} + + {% if previous_request_args['job_label'] is not none %} + + Label "{{ previous_request_args['job_label'] }}"     + + + {% endif %}

diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html index 53077a6e..45be261e 100644 --- a/clockwork_web/templates/jobs_search.html +++ b/clockwork_web/templates/jobs_search.html @@ -101,6 +101,10 @@

JOBS

{% if (web_settings | check_web_settings_column_display(page_name, "job_array")) %} Job array {% endif %} + + {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} + labels + {% endif %} {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %} {% set sort_by = "name" %} @@ -193,6 +197,19 @@

JOBS

{% endif %} + + {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} + + {% for D_label in D_job['job_labels'] %} +

+ + {{ D_label['name'] }} + +

+ {% endfor %} + + {% endif %} + {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %} {{D_job['slurm'].get("name", "")[0:20]}} diff --git a/test_common/fake_data.json b/test_common/fake_data.json index c9f97488..30667bf9 100644 --- a/test_common/fake_data.json +++ b/test_common/fake_data.json @@ -5971,7 +5971,7 @@ }, { "name": "je suis un label 2", - "user_id": " student16@mila.quebec", + "user_id": "student16@mila.quebec", "job_id": 606872 }, { @@ -5980,13 +5980,13 @@ "job_id": 834395 }, { - "name": "je suis un label 4", - "user_id": " student15@mila.quebec", + "name": "je suis un label 3", + "user_id": "student15@mila.quebec", "job_id": 154325 }, { - "name": "je suis un label 5", - "user_id": " student15@mila.quebec", + "name": "je suis un label 4", + "user_id": "student15@mila.quebec", "job_id": 154325 }, { diff --git a/test_common/fake_data.py b/test_common/fake_data.py index 757da134..1017c554 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -70,7 +70,9 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): [("mila_email_username", 1)], name="users_email_index" ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") - db_insertion_point["labels"].create_index([("user_id", 1), ("job_id", 1)], name="user_id_and_job_id") + db_insertion_point["labels"].create_index( + [("user_id", 1), ("job_id", 1), ("name", 1)], name="job_label_index" + ) for k in ["users", "jobs", "nodes", "gpu", "labels"]: if k in E: From fcc3f6b4bf0804bdf95ef8ef37c61cb7407871d6 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 6 Oct 2023 14:27:54 -0400 Subject: [PATCH 04/19] Fix unit tests. --- clockwork_web/core/jobs_helper.py | 20 ++++++++++++++------ clockwork_web/templates/jobs_search.html | 2 +- test_common/fake_data.py | 12 ++++++++++++ test_common/jobs_test_helpers.py | 12 ++++-------- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index 56825fde..96b6819b 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -162,17 +162,25 @@ def get_filtered_and_paginated_jobs( label_map = {} # Collect all labels related to found jobs, # and store them in a dict with keys (user ID, job ID) - for label in mc["labels"].find( - combine_all_mongodb_filters( - {"job_id": {"$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]}} + for label in list( + mc["labels"].find( + combine_all_mongodb_filters( + { + "job_id": { + "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs] + } + } + ) ) ): + # Remove MongoDB identifier, as we won't use it. + label.pop("_id") label_map.setdefault((label["user_id"], label["job_id"]), []).append(label) # Populate jobs with labels using job's user email and job ID to find related labels in labels dict. for job in LD_jobs: - job["job_labels"] = label_map.get( - (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])), [] - ) + key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])) + if key in label_map: + job["job_labels"] = label_map[key] # Set nbr_total_jobs if want_count: diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html index 45be261e..fdc364a1 100644 --- a/clockwork_web/templates/jobs_search.html +++ b/clockwork_web/templates/jobs_search.html @@ -200,7 +200,7 @@

JOBS

{% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} - {% for D_label in D_job['job_labels'] %} + {% for D_label in D_job.get('job_labels', []) %}

{{ D_label['name'] }} diff --git a/test_common/fake_data.py b/test_common/fake_data.py index 1017c554..cf3cf360 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -20,6 +20,15 @@ def fake_data(): ) with open(json_file, "r") as f: E = json.load(f) + + # Add labels to jobs + for job in E["jobs"]: + job_id = int(job["slurm"]["job_id"]) + user_id = job["cw"]["mila_email_username"] + for label in E["labels"]: + if label["job_id"] == job_id and label["user_id"] == user_id: + job.setdefault("job_labels", []).append(label) + mutate_some_job_status(E) return E @@ -99,6 +108,9 @@ def cleanup_function(): for e in E["gpu"]: db_insertion_point["gpu"].delete_many({"name": e["name"]}) + for e in E["labels"]: + db_insertion_point["labels"].delete_many({"name": e["name"]}) + for (k, sub, id_field) in [ ("jobs", "slurm", "job_id"), ("nodes", "slurm", "name"), diff --git a/test_common/jobs_test_helpers.py b/test_common/jobs_test_helpers.py index 5804a2ec..b66ac7af 100644 --- a/test_common/jobs_test_helpers.py +++ b/test_common/jobs_test_helpers.py @@ -34,11 +34,8 @@ def helper_single_job_at_random(fake_data, cluster_name): def validator(D_job): for k1 in original_D_job: - assert k1 in ["slurm", "cw", "user"] - for k2 in original_D_job[k1]: - assert ( - D_job[k1][k2] == original_D_job[k1][k2] - ), f"{D_job}\n{original_D_job}" + assert k1 in ["slurm", "cw", "user", "job_labels"] + assert D_job[k1] == original_D_job[k1], f"{D_job}\n{original_D_job}" return validator, job_id @@ -167,8 +164,7 @@ def validator(LD_jobs): # compare all the dicts one by one for (D_job, D_original_job) in zip(LD_jobs, LD_original_jobs): for k1 in D_original_job: - assert k1 in ["slurm", "cw", "user"] - for k2 in D_original_job[k1]: - assert D_job[k1][k2] == D_original_job[k1][k2] + assert k1 in ["slurm", "cw", "user", "job_labels"] + assert D_job[k1] == D_original_job[k1] return validator From 0221910d10061ae10c3b24bc55d749d4fe70740d Mon Sep 17 00:00:00 2001 From: notoraptor Date: Sun, 12 Nov 2023 18:10:29 -0500 Subject: [PATCH 05/19] Make sure to set job label to None when deselected --- clockwork_web/core/search_helper.py | 2 +- clockwork_web/templates/base.html | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py index f0e55723..e46c2072 100644 --- a/clockwork_web/core/search_helper.py +++ b/clockwork_web/core/search_helper.py @@ -21,7 +21,7 @@ def parse_search_request(user, args, force_pagination=True): want_count = to_boolean(want_count) job_array = args.get("job_array", type=int, default=None) - job_label = args.get("job_label", type=str, default=None) + job_label = args.get("job_label", type=str, default=None) or None default_page_number = "1" if force_pagination else None diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html index adee968c..57052268 100644 --- a/clockwork_web/templates/base.html +++ b/clockwork_web/templates/base.html @@ -24,7 +24,7 @@ - + @@ -339,7 +339,7 @@

+ Label "{{ previous_request_args['job_label'] }}"     From 391f004f2bbc85fcf644a8778d3ada0293ec6325 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 16 Feb 2024 09:41:11 -0500 Subject: [PATCH 06/19] Allow to display job-user dicts --- clockwork_web/browser_routes/jobs.py | 6 ++-- clockwork_web/core/jobs_helper.py | 21 +++++++----- clockwork_web/core/search_helper.py | 9 ++++-- clockwork_web/templates/base.html | 13 +++++--- clockwork_web/templates/jobs_search.html | 7 ++-- test_common/fake_data.json | 41 +++++++++++++++--------- test_common/fake_data.py | 11 ++++--- 7 files changed, 68 insertions(+), 40 deletions(-) diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py index b5156f6f..b3dffbfb 100644 --- a/clockwork_web/browser_routes/jobs.py +++ b/clockwork_web/browser_routes/jobs.py @@ -101,7 +101,8 @@ def route_search(): - "sort_asc" is an optional integer and used to specify if sorting is ascending (1) or descending (-1). Default is 1. - "job_array" is optional and used to specify the job array in which we are looking for jobs - - "job_label" is optional and used to specify the label associated to jobs we are looking for + - "job_label_name" is optional and used to specify the label name associated to jobs we are looking for + - "job_label_content" is optional and used to specify the label value associated to jobs we are looking for .. :quickref: list all Slurm job as formatted html """ @@ -165,7 +166,8 @@ def route_search(): "sort_by": query.sort_by, "sort_asc": query.sort_asc, "job_array": query.job_array, - "job_label": query.job_label, + "job_label_name": query.job_label_name, + "job_label_content": query.job_label_content, }, ) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index 96b6819b..fd9e58cd 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -161,7 +161,7 @@ def get_filtered_and_paginated_jobs( if LD_jobs: label_map = {} # Collect all labels related to found jobs, - # and store them in a dict with keys (user ID, job ID) + # and store them in a dict with keys (user ID, job ID, cluster_name) for label in list( mc["labels"].find( combine_all_mongodb_filters( @@ -175,10 +175,13 @@ def get_filtered_and_paginated_jobs( ): # Remove MongoDB identifier, as we won't use it. label.pop("_id") - label_map.setdefault((label["user_id"], label["job_id"]), []).append(label) - # Populate jobs with labels using job's user email and job ID to find related labels in labels dict. + key = (label["user_id"], label["job_id"], label["cluster_name"]) + assert key not in label_map + label_map[key] = label["labels"] + # Populate jobs with labels using job's user email, job ID and cluster name + # to find related labels in labels dict. for job in LD_jobs: - key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])) + key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]), job["slurm"]["cluster_name"]) if key in label_map: job["job_labels"] = label_map[key] @@ -260,7 +263,8 @@ def get_jobs( sort_by="submit_time", sort_asc=-1, job_array=None, - job_label=None, + job_label_name=None, + job_label_content=None, ): """ Set up the filters according to the parameters and retrieve the requested jobs from the database. @@ -278,7 +282,8 @@ def get_jobs( sort_asc Whether or not to sort in ascending order (1) or descending order (-1). job_array ID of job array in which we look for jobs. - job_label label (string) we must find in jobs to look for. + job_label_name name of label (string) we must find in jobs to look for. + job_label_content content of label (string) we must find in jobs to look for. Returns: A tuple containing: @@ -288,12 +293,12 @@ def get_jobs( """ # If job label is specified, # get job indices from jobs associated to this label. - if job_label is not None: + if job_label_name is not None and job_label_content is not None: mc = get_db() label_job_ids = [ str(label["job_id"]) for label in mc["labels"].find( - combine_all_mongodb_filters({"name": job_label}) + combine_all_mongodb_filters({f"labels.{job_label_name}": job_label_content}) ) ] if job_ids: diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py index e46c2072..54e6a75c 100644 --- a/clockwork_web/core/search_helper.py +++ b/clockwork_web/core/search_helper.py @@ -21,7 +21,8 @@ def parse_search_request(user, args, force_pagination=True): want_count = to_boolean(want_count) job_array = args.get("job_array", type=int, default=None) - job_label = args.get("job_label", type=str, default=None) or None + job_label_name = args.get("job_label_name", type=str, default=None) or None + job_label_content = args.get("job_label_content", type=str, default=None) or None default_page_number = "1" if force_pagination else None @@ -72,7 +73,8 @@ def parse_search_request(user, args, force_pagination=True): sort_asc=sort_asc, want_count=want_count, job_array=job_array, - job_label=job_label, + job_label_name=job_label_name, + job_label_content=job_label_content, ) ######################### @@ -117,6 +119,7 @@ def search_request(user, args, force_pagination=True): sort_by=query.sort_by, sort_asc=query.sort_asc, job_array=query.job_array, - job_label=query.job_label, + job_label_name=query.job_label_name, + job_label_content=query.job_label_content, ) return (query, jobs, nbr_total_jobs) diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html index 57052268..51d8c7d8 100644 --- a/clockwork_web/templates/base.html +++ b/clockwork_web/templates/base.html @@ -324,8 +324,11 @@

{% endif %} - {% if previous_request_args['job_label'] is not none %} - + {% if previous_request_args['job_label_name'] is not none %} + + {% endif %} + {% if previous_request_args['job_label_content'] is not none %} + {% endif %}
@@ -338,9 +341,9 @@

- Label "{{ previous_request_args['job_label'] }}"     + {% if previous_request_args['job_label_name'] is not none and previous_request_args['job_label_content'] is not none %} + + Label {{ previous_request_args['job_label_name'] }}: "{{ previous_request_args['job_label_content'] }}"     {% endif %} diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html index fdc364a1..2bd20321 100644 --- a/clockwork_web/templates/jobs_search.html +++ b/clockwork_web/templates/jobs_search.html @@ -200,10 +200,11 @@

JOBS

{% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} - {% for D_label in D_job.get('job_labels', []) %} + {% for D_label_name, D_label_content in D_job.get('job_labels', {}).items() %}

- - {{ D_label['name'] }} + + {{ D_label_name }}
+ {{ D_label_content }}

{% endfor %} diff --git a/test_common/fake_data.json b/test_common/fake_data.json index 30667bf9..0ffcbee2 100644 --- a/test_common/fake_data.json +++ b/test_common/fake_data.json @@ -5965,34 +5965,45 @@ ], "labels": [ { - "name": "je suis un label 1", "user_id": "student06@mila.quebec", - "job_id": 795002 + "job_id": 795002, + "cluster_name": "mila", + "labels": { + "name": "je suis un label 1" + } }, { - "name": "je suis un label 2", "user_id": "student16@mila.quebec", - "job_id": 606872 - }, - { - "name": "je suis un label 3", - "user_id": "student15@mila.quebec", - "job_id": 834395 + "job_id": 606872, + "cluster_name": "mila", + "labels": { + "name": "je suis un label 2" + } }, { - "name": "je suis un label 3", "user_id": "student15@mila.quebec", - "job_id": 154325 + "job_id": 834395, + "cluster_name": "graham", + "labels": { + "name": "je suis un label 3" + } }, { - "name": "je suis un label 4", "user_id": "student15@mila.quebec", - "job_id": 154325 + "job_id": 154325, + "cluster_name": "graham", + "labels": { + "name": "je suis un label 3", + "name2": "je suis un label 4" + } }, { - "name": "je suis un label 1", "user_id": "student12@mila.quebec", - "job_id": 613024 + "job_id": 613024, + "cluster_name": "graham", + "labels": { + "name": "je suis un label 1" + } } ] } diff --git a/test_common/fake_data.py b/test_common/fake_data.py index cf3cf360..bf14e314 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -25,9 +25,10 @@ def fake_data(): for job in E["jobs"]: job_id = int(job["slurm"]["job_id"]) user_id = job["cw"]["mila_email_username"] + cluster_name = job["slurm"]["cluster_name"] for label in E["labels"]: - if label["job_id"] == job_id and label["user_id"] == user_id: - job.setdefault("job_labels", []).append(label) + if label["job_id"] == job_id and label["user_id"] == user_id and label["cluster_name"] == cluster_name: + job["job_labels"] = label["labels"] mutate_some_job_status(E) return E @@ -80,7 +81,7 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") db_insertion_point["labels"].create_index( - [("user_id", 1), ("job_id", 1), ("name", 1)], name="job_label_index" + [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], name="job_label_index" ) for k in ["users", "jobs", "nodes", "gpu", "labels"]: @@ -109,7 +110,9 @@ def cleanup_function(): db_insertion_point["gpu"].delete_many({"name": e["name"]}) for e in E["labels"]: - db_insertion_point["labels"].delete_many({"name": e["name"]}) + copy_e = e + copy_e.pop("labels") + db_insertion_point["labels"].delete_many(copy_e) for (k, sub, id_field) in [ ("jobs", "slurm", "job_id"), From eb751d5e42df65ff0ba598899b17b850f14899e3 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Fri, 16 Feb 2024 10:27:00 -0500 Subject: [PATCH 07/19] Format code. --- clockwork_web/core/jobs_helper.py | 10 ++++++++-- test_common/fake_data.py | 9 +++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index fd9e58cd..f06e3f74 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -181,7 +181,11 @@ def get_filtered_and_paginated_jobs( # Populate jobs with labels using job's user email, job ID and cluster name # to find related labels in labels dict. for job in LD_jobs: - key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]), job["slurm"]["cluster_name"]) + key = ( + job["cw"]["mila_email_username"], + int(job["slurm"]["job_id"]), + job["slurm"]["cluster_name"], + ) if key in label_map: job["job_labels"] = label_map[key] @@ -298,7 +302,9 @@ def get_jobs( label_job_ids = [ str(label["job_id"]) for label in mc["labels"].find( - combine_all_mongodb_filters({f"labels.{job_label_name}": job_label_content}) + combine_all_mongodb_filters( + {f"labels.{job_label_name}": job_label_content} + ) ) ] if job_ids: diff --git a/test_common/fake_data.py b/test_common/fake_data.py index bf14e314..0677e267 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -27,7 +27,11 @@ def fake_data(): user_id = job["cw"]["mila_email_username"] cluster_name = job["slurm"]["cluster_name"] for label in E["labels"]: - if label["job_id"] == job_id and label["user_id"] == user_id and label["cluster_name"] == cluster_name: + if ( + label["job_id"] == job_id + and label["user_id"] == user_id + and label["cluster_name"] == cluster_name + ): job["job_labels"] = label["labels"] mutate_some_job_status(E) @@ -81,7 +85,8 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") db_insertion_point["labels"].create_index( - [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], name="job_label_index" + [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], + name="job_label_index", ) for k in ["users", "jobs", "nodes", "gpu", "labels"]: From c4d94eada256b71b21a37bedb226161400094ff1 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 19 Feb 2024 19:27:08 -0500 Subject: [PATCH 08/19] Improve get_jobs(): filter labels using current user in get_filtered_and_paginated_jobs() --- clockwork_web/core/jobs_helper.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index f06e3f74..30f82354 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -7,6 +7,7 @@ import time from flask.globals import current_app +from flask_login import current_user from ..db import get_db @@ -168,7 +169,8 @@ def get_filtered_and_paginated_jobs( { "job_id": { "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs] - } + }, + "user_id": current_user.mila_email_username, } ) ) @@ -178,16 +180,19 @@ def get_filtered_and_paginated_jobs( key = (label["user_id"], label["job_id"], label["cluster_name"]) assert key not in label_map label_map[key] = label["labels"] - # Populate jobs with labels using job's user email, job ID and cluster name - # to find related labels in labels dict. - for job in LD_jobs: - key = ( - job["cw"]["mila_email_username"], - int(job["slurm"]["job_id"]), - job["slurm"]["cluster_name"], - ) - if key in label_map: - job["job_labels"] = label_map[key] + + if label_map: + # Populate jobs with labels using job's user email, job ID and cluster name + # to find related labels in labels dict. + for job in LD_jobs: + key = ( + # job["cw"]["mila_email_username"], + current_user.mila_email_username, + int(job["slurm"]["job_id"]), + job["slurm"]["cluster_name"], + ) + if key in label_map: + job["job_labels"] = label_map[key] # Set nbr_total_jobs if want_count: From 105ecbd183b193718522cc82829a8d77c1d7531c Mon Sep 17 00:00:00 2001 From: notoraptor Date: Tue, 20 Feb 2024 07:27:31 -0500 Subject: [PATCH 09/19] Add a specific improved function _jobs_are_old(cluster_name) to tell if cluster jobs are old. Much faster to use than get_jobs(). --- clockwork_web/core/users_helper.py | 43 ++++++++++++++++-------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/clockwork_web/core/users_helper.py b/clockwork_web/core/users_helper.py index 86862e64..30ce7265 100644 --- a/clockwork_web/core/users_helper.py +++ b/clockwork_web/core/users_helper.py @@ -592,19 +592,30 @@ def render_template_with_user_settings(template_name_or_list, **context): # Get cluster status (if jobs are old and cluster has error). for cluster_name in context["clusters"]: - # Default status values. - jobs_are_old = False + # Cluster error cannot yet be checked, so + # cluster_has_error is always False for now. cluster_has_error = False + context["clusters"][cluster_name]["status"] = { + "jobs_are_old": _jobs_are_old(cluster_name), + "cluster_has_error": cluster_has_error, + } - # Check if jobs are old. - jobs, _ = get_jobs(cluster_names=[cluster_name]) - job_dates = [ - job["cw"]["last_slurm_update"] - for job in jobs - if "last_slurm_update" in job["cw"] - ] - if job_dates: - most_recent_job_edition = max(job_dates) + return render_template(template_name_or_list, **context) + + +def _jobs_are_old(cluster_name): + jobs_are_old = False + + mongodb_filter = {"slurm.cluster_name": cluster_name} + mc = get_db() + job_with_max_cw_last_slurm_update = list( + mc["jobs"].find(mongodb_filter).sort([("cw.last_slurm_update", -1)]).limit(1) + ) + + if job_with_max_cw_last_slurm_update: + (job,) = job_with_max_cw_last_slurm_update + if "last_slurm_update" in job["cw"]: + most_recent_job_edition = job["cw"]["last_slurm_update"] current_timestamp = datetime.now().timestamp() elapsed_time = timedelta( seconds=current_timestamp - most_recent_job_edition @@ -613,12 +624,4 @@ def render_template_with_user_settings(template_name_or_list, **context): max_delay = timedelta(days=30) jobs_are_old = elapsed_time > max_delay - # Cluster error cannot yet be checked, so - # cluster_has_error is always False for now. - - context["clusters"][cluster_name]["status"] = { - "jobs_are_old": jobs_are_old, - "cluster_has_error": cluster_has_error, - } - - return render_template(template_name_or_list, **context) + return jobs_are_old From d3b574e98938f9c2363f635449c88e763387ab96 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Tue, 20 Feb 2024 07:28:21 -0500 Subject: [PATCH 10/19] Add a specific script to populate database with huge fake data, adding about 1 000 000 jobs and job-user dicts. --- scripts/store_huge_fake_data_in_db.py | 509 ++++++++++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 scripts/store_huge_fake_data_in_db.py diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py new file mode 100644 index 00000000..c9471eae --- /dev/null +++ b/scripts/store_huge_fake_data_in_db.py @@ -0,0 +1,509 @@ +""" +Variation du temps de requête en fonction du nombre de dictionnaires job-utilisateur +Pour un nombre de jobs fixes = n: + 0 à n dicts de 1 prop chacun + --nb-dicts + 1 à k props pour chacun des n dicts + --nb-props +Variation du temps de requête en fonction du nombre de jobs dans la DB + Avec 0 dicts: 0 à n jobs + --nb-jobs + Avec n dicts de k props: 0 à n jobs + --nb-jobs + +n = 19 +--nb-jobs: 0 à n => 2 ** 0 à 2 ** n +--nb-dicts: 0 à n => 2 ** 0 à 2 ** n +--nb-props: 1 à k + +Paramètres: +--nb-jobs --nb-dicts --nb-props-per-dict +n 0 1 +n ... 1 +n n 1 +n n ... +n n k + +0 0 1 +... 0 1 +n 0 1 +------------VS----------- +0 n k +... n k +n n k +""" + +import argparse +import sys +from datetime import datetime + +from clockwork_web.config import register_config +from slurm_state.mongo_client import get_mongo_client +from slurm_state.config import get_config + +USERS = [ + { + "mila_email_username": "student00@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa00", + "mila_cluster_username": "milauser00", + "cc_account_username": "ccuser00", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student01@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa01", + "mila_cluster_username": "milauser01", + "cc_account_username": "ccuser01", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student02@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa02", + "mila_cluster_username": "milauser02", + "cc_account_username": "ccuser02", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student03@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa03", + "mila_cluster_username": "milauser03", + "cc_account_username": "ccuser03", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student04@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa04", + "mila_cluster_username": "milauser04", + "cc_account_username": "ccuser04", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student05@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa05", + "mila_cluster_username": "milauser05", + "cc_account_username": "ccuser05", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student06@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa06", + "mila_cluster_username": "milauser06", + "cc_account_username": None, + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student07@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa07", + "mila_cluster_username": "milauser07", + "cc_account_username": "ccuser07", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student08@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa08", + "mila_cluster_username": "milauser08", + "cc_account_username": "ccuser08", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student09@mila.quebec", + "status": "disabled", + "clockwork_api_key": "000aaa09", + "mila_cluster_username": "milauser09", + "cc_account_username": "ccuser09", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student10@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa10", + "mila_cluster_username": "milauser10", + "cc_account_username": "ccuser10", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student11@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa11", + "mila_cluster_username": "milauser11", + "cc_account_username": "ccuser11", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student12@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa12", + "mila_cluster_username": "milauser12", + "cc_account_username": "ccuser12", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student13@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa13", + "mila_cluster_username": "milauser13", + "cc_account_username": "ccuser13", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student14@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa14", + "mila_cluster_username": "milauser14", + "cc_account_username": "ccuser14", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student15@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa15", + "mila_cluster_username": "milauser15", + "cc_account_username": "ccuser15", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student16@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa16", + "mila_cluster_username": "milauser16", + "cc_account_username": "ccuser16", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student17@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa17", + "mila_cluster_username": "milauser17", + "cc_account_username": "ccuser17", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student18@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa18", + "mila_cluster_username": "milauser18", + "cc_account_username": "ccuser18", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student19@mila.quebec", + "status": "disabled", + "clockwork_api_key": "000aaa19", + "mila_cluster_username": "milauser19", + "cc_account_username": "ccuser19", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, +] +BASE_JOB_SLURM = { + "account": "def-patate-rrg", + "cluster_name": "beluga", + "time_limit": 4320, + "submit_time": 1681680327, + "start_time": 0, + "end_time": 0, + "exit_code": "SUCCESS:0", + "array_job_id": "0", + "array_task_id": "None", + "job_id": "197775", + "name": "somejobname_507716", + "nodes": "None assigned", + "partition": "other_fun_partition", + "job_state": "PENDING", + "tres_allocated": {}, + "tres_requested": { + "num_cpus": 80, + "mem": 95000, + "num_nodes": 1, + "billing": 80, + }, + "username": "ccuser02", + "working_directory": "/a809/b333/c569", +} +BASE_JOB_CW = { + "mila_email_username": "student02@mila.quebec", + "last_slurm_update": 1686248596.476063, + "last_slurm_update_by_sacct": 1686248596.476063, +} +BASE_USER_JOB_DICT = { + "user_id": "student00@mila.quebec", + "job_id": 795002, + "cluster_name": "beluga", + "labels": {f"name_{i + 1}": f"i am a label {i + 1}" for i in range(4)}, +} + + +def _generate_huge_fake_data(with_labels=False): + nb_jobs_per_user = [2**i for i in range(len(USERS))] + assert len(nb_jobs_per_user) == len(USERS) + nb_user_job_dicts = sum(nb_jobs_per_user) + jobs = [] + labels = [] + + # populate jobs + job_id = 0 + for user, nb_user_jobs in zip(USERS, nb_jobs_per_user): + for i in range(nb_user_jobs): + job_id += 1 + job_slurm = BASE_JOB_SLURM.copy() + job_cw = BASE_JOB_CW.copy() + # edit slurm.job_id + job_slurm["job_id"] = str(job_id) + # edit slurm.name + job_slurm["name"] = f"job_name_{job_id}" + # edit slurm.username + job_slurm["username"] = user["cc_account_username"] + # edit cw.mila_email_username + job_cw["mila_email_username"] = user["mila_email_username"] + jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) + print("Nb. jobs:", job_id) + assert job_id == nb_user_job_dicts + + if with_labels: + # populate labels + for i in range(nb_user_job_dicts): + user_job_dict = BASE_USER_JOB_DICT.copy() + # edit job_id + user_job_dict["job_id"] = i + 1 + labels.append(user_job_dict) + + return {"users": USERS, "jobs": jobs, "labels": labels} + + +def populate_fake_data(db_insertion_point, labels=False): + print("Generating huge fake data") + E = _generate_huge_fake_data(with_labels=labels) + print("Generated huge fake data") + + # Create indices. This isn't half as important as when we're + # dealing with large quantities of data, but it's part of the + # set up for the database. + db_insertion_point["jobs"].create_index( + [("slurm.job_id", 1), ("slurm.cluster_name", 1)], + name="job_id_and_cluster_name", + ) + db_insertion_point["nodes"].create_index( + [("slurm.name", 1), ("slurm.cluster_name", 1)], + name="name_and_cluster_name", + ) + db_insertion_point["users"].create_index( + [("mila_email_username", 1)], name="users_email_index" + ) + db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") + db_insertion_point["labels"].create_index( + [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], + name="job_label_index", + ) + + for k in ["users", "jobs", "nodes", "gpu", "labels"]: + if k in E and E[k]: + print("Inserting", k) + db_insertion_point[k].insert_many(E[k]) + print("Inserted", k) + + def cleanup_function(): + """ + Each of those kinds of data is identified in a unique way, + and we can use that identifier to clean up. + + For example, when clearing out jobs, we can look at the "job_id" + of the entries that we inserted. + + The point is that we can run a test against the production mongodb on Atlas + and not affect the real data. If we cleared the tables completely, + then we'd be affecting the real data in a bad way. + """ + for e in E["users"]: + db_insertion_point["users"].delete_many( + {"mila_email_username": e["mila_email_username"]} + ) + + for e in E["gpu"]: + db_insertion_point["gpu"].delete_many({"name": e["name"]}) + + for e in E["labels"]: + copy_e = e + copy_e.pop("labels") + db_insertion_point["labels"].delete_many(copy_e) + + for (k, sub, id_field) in [ + ("jobs", "slurm", "job_id"), + ("nodes", "slurm", "name"), + ]: + if k in E: + for e in E[k]: + # This is complicated, but it's just about a way to say something like + # that we want to remove {"slurm.job_id", e["slurm"]["job_id"]}, + # and the weird notation comes from the fact that mongodb filters use dots, + # but not the original python. + db_insertion_point[k].delete_many( + {f"{sub}.{id_field}": e[sub][id_field]} + ) + + return cleanup_function + + +def store_data_in_db(labels=False): + # Open the database and insert the contents. + client = get_mongo_client() + populate_fake_data(client[get_config("mongo.database_name")], labels=labels) + + +def modify_timestamps(data): + """ + This function updates the timestamps in order to simulate jobs which have + been launched more recently than they were. + """ + # Retrieve the most recent timestamp (ie its end_time) + most_recent_timestamp = data["jobs"][0]["slurm"]["end_time"] + # most_recent_timestamp = min(job["slurm"]["end_time"] for job in data["jobs"]) + for job in data["jobs"]: + new_end_time = job["slurm"]["end_time"] + if new_end_time: + if new_end_time > most_recent_timestamp: + most_recent_timestamp = new_end_time + + # Retrieve the time interval between this timestamp and now + time_delta = datetime.now().timestamp() - most_recent_timestamp + + # Substract it to the timestamps of the jobs + for job in data["jobs"]: + if job["slurm"]["submit_time"]: + job["slurm"]["submit_time"] += time_delta + if job["slurm"]["start_time"]: + job["slurm"]["start_time"] += time_delta + if job["slurm"]["end_time"]: + job["slurm"]["end_time"] += time_delta + + +def main(argv): + # Retrieve the arguments passed to the script + parser = argparse.ArgumentParser() + parser.add_argument( + "--labels", + type=bool, + action=argparse.BooleanOptionalAction, + default=False, + help="Add fake job-user dicts. If False (default), only add jobs.", + ) + args = parser.parse_args(argv[1:]) + print(args) + + # Register the elements to access the database + register_config("mongo.connection_string", "") + register_config("mongo.database_name", "clockwork") + + # Store the generated fake data in the database + store_data_in_db(labels=args.labels) + + +if __name__ == "__main__": + main(sys.argv) From 4f6459dc09e07e6e8027fbf6ee8b70dd35834d14 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Tue, 20 Feb 2024 07:36:58 -0500 Subject: [PATCH 11/19] Make job-user dicts more specific to each job. --- scripts/store_huge_fake_data_in_db.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index c9471eae..2b901a65 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -333,12 +333,6 @@ "last_slurm_update": 1686248596.476063, "last_slurm_update_by_sacct": 1686248596.476063, } -BASE_USER_JOB_DICT = { - "user_id": "student00@mila.quebec", - "job_id": 795002, - "cluster_name": "beluga", - "labels": {f"name_{i + 1}": f"i am a label {i + 1}" for i in range(4)}, -} def _generate_huge_fake_data(with_labels=False): @@ -370,9 +364,15 @@ def _generate_huge_fake_data(with_labels=False): if with_labels: # populate labels for i in range(nb_user_job_dicts): - user_job_dict = BASE_USER_JOB_DICT.copy() - # edit job_id - user_job_dict["job_id"] = i + 1 + user_job_dict = { + "user_id": "student00@mila.quebec", + "job_id": i + 1, + "cluster_name": "beluga", + "labels": { + f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" + for j in range(4) + }, + } labels.append(user_job_dict) return {"users": USERS, "jobs": jobs, "labels": labels} From ebd4ddf06a6b2a431dbeff5632301f48ed55f5f3 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 26 Feb 2024 09:40:40 -0500 Subject: [PATCH 12/19] Script store_huge_fake_data_in_db: add command-line arguments --nb-jobs, --nb-dicts, --nb-props-per-dict Add scripts for benchmarking --- scripts/gen_job_request_benchmark_script.py | 51 +++++ scripts/job_request_benchmark.py | 201 ++++++++++++++++++++ scripts/plot_job_request_benchmark.py | 144 ++++++++++++++ scripts/store_huge_fake_data_in_db.py | 151 ++++++++------- 4 files changed, 480 insertions(+), 67 deletions(-) create mode 100644 scripts/gen_job_request_benchmark_script.py create mode 100644 scripts/job_request_benchmark.py create mode 100644 scripts/plot_job_request_benchmark.py diff --git a/scripts/gen_job_request_benchmark_script.py b/scripts/gen_job_request_benchmark_script.py new file mode 100644 index 00000000..b7621cd8 --- /dev/null +++ b/scripts/gen_job_request_benchmark_script.py @@ -0,0 +1,51 @@ +import sys + +# max: sum(2**i for i in range(n)) jobs +# max: sum(2**i for i in range(n)) dicts +N = 17 +Ks = (1, 10, 100) + +NB_REQUESTS = 10 + + +def main(): + if len(sys.argv) == 2: + wd = sys.argv[1] + else: + wd = "local" + + print("set -eu") + + for nb_props_per_dict in Ks: + for nb_dicts in range(N + 1): + gen_commands(N, nb_dicts, nb_props_per_dict, wd) + + for nb_jobs in range(N): + gen_commands(nb_jobs, 0, 1, wd) + + for nb_props_per_dict in Ks: + for nb_jobs in range(N): + gen_commands(nb_jobs, N, nb_props_per_dict, wd) + + +def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory): + task_name = f"jobs-{nb_jobs:02}_dicts-{nb_dicts:02}_props-{nb_props_per_dict:02}" + + cmd_fake_data = ( + f"python3 scripts/store_huge_fake_data_in_db.py " + f"--nb-jobs {nb_jobs} " + f"--nb-dicts {nb_dicts} " + f"--nb-props-per-dict {nb_props_per_dict}" + ) + cmd_benchmark = ( + f"python3 scripts/job_request_benchmark.py " + f"--config {working_directory}/config.json " + f"--nb-requests {NB_REQUESTS} " + f"--output {task_name}" + ) + print(cmd_fake_data) + print(cmd_benchmark) + + +if __name__ == "__main__": + main() diff --git a/scripts/job_request_benchmark.py b/scripts/job_request_benchmark.py new file mode 100644 index 00000000..b324b8e7 --- /dev/null +++ b/scripts/job_request_benchmark.py @@ -0,0 +1,201 @@ +import os + +import argparse +import sys +import logging +import time +from datetime import datetime +from collections import namedtuple +import json + +try: + from clockwork_tools.client import ClockworkToolsClient +except Exception: + print( + "Clockwork tools needed. You can install it with `cd clockwork_tools` then `pip install -e .`" + ) + raise + + +log_format = "%(levelname)s:%(name)s:%(asctime)s: %(message)s" +logging.basicConfig(level=logging.INFO, format=log_format) + +logger = logging.getLogger("server_benchmark") + + +class CallStat( + namedtuple( + "CallStat", ("username", "nb_jobs", "pt_start", "pt_end", "pc_start", "pc_end") + ) +): + """ + Class to collect stats and time for 1 request. + + Python provides 2 precision functions for profiling: + - time.process_time_ns(): only process time, does not include sleep times. + - time.perf_counter_ns(): includes sleep times. + + I made a mistake in previous commits because I measured requests using + process_time(). Thus, request times looked very small, as they don't + include sleeps, which are used to wait for server response. + + So, I decided to measure both process time and full (perf_counter) time + to check how they differ: + - process time is still very small (less than 0.10 seconds) + and correctly approximated with a linear regression wr/t nunber of jobs. + - full time (perf_counter) is very much higher, sometimes up to 10 seconds, + and way more irregular (badly approximated with linear regression). + + In practice, I guess the relevant measure is full time (with perf_counter), + as it correctly represents how much time user could wait to get response + ** if he gets all jobs at once without pagination **. + """ + + @property + def pt_nanoseconds(self): + """Duration measured with process time.""" + return self.pt_end - self.pt_start + + @property + def pc_nanoseconds(self): + """Duration measured with perf counter (full duration).""" + return self.pc_end - self.pc_start + + def summary(self): + return { + "nb_jobs": self.nb_jobs, + "pc_nanoseconds": self.pc_nanoseconds, + } + + +class BenchmarkClient(ClockworkToolsClient): + """Client with a specific method for profiling.""" + + def profile_getting_user_jobs(self, username: str = None) -> CallStat: + """Profile a request `jobs/list` with given username and return a CallStat.""" + pc_start = time.perf_counter_ns() + pt_start = time.process_time_ns() + jobs = self.jobs_list(username) + pt_end = time.process_time_ns() + pc_end = time.perf_counter_ns() + return CallStat( + username=username, + nb_jobs=len(jobs), + pc_start=pc_start, + pc_end=pc_end, + pt_start=pt_start, + pt_end=pt_end, + ) + + +def main(): + argv = sys.argv + parser = argparse.ArgumentParser( + prog=argv[0], + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument("-a", "--address", help="Server host.") + parser.add_argument("-p", "--port", type=int, default=443, help="Server port.") + parser.add_argument( + "--config", + type=str, + help=( + "Optional JSON configuration file to use for benchmarking. " + "If not specified, use --address, --port, and OS environment variables for clockwork api key and email. " + "If file exists, ignore --address, --port and OS variables, and read config from file. " + "If file does not exist, create file with config values from --address, --port and OS variables. " + "Configuration file must contain a dictionary with keys " + "'address' (str), 'port` (int), 'api_key` (str), 'email' (str)." + ), + ) + parser.add_argument( + "-n", + "--nb-requests", + type=int, + default=10, + help="Number of requests to send (default, 10).", + ) + parser.add_argument( + "-o", + "--output", + type=str, + required=True, + help="Benchmark name, used to save stats on disk. " + "Saved in /.json", + ) + args = parser.parse_args(argv[1:]) + print("Arguments:", args) + + if args.nb_requests < 1: + logger.error(f"No positive time specified for benchmarking, exit.") + sys.exit(1) + + bench_date = datetime.now() + config_path = None + working_directory = "." + if args.config: + config_path = os.path.abspath(args.config) + working_directory = os.path.dirname(config_path) + # Save next log messages into a file. + log_formatter = logging.Formatter(log_format) + log_path = os.path.join(working_directory, f"bench_{args.output}.log") + logger.info(f"Saving log in: {log_path}") + file_handler = logging.FileHandler(log_path) + file_handler.setFormatter(log_formatter) + logger.addHandler(file_handler) + + if config_path and os.path.isfile(config_path): + # Read config file if available. + with open(config_path) as file: + config = json.load(file) + address = config["address"] + port = config["port"] + api_key = config["api_key"] + email = config["email"] + logger.info(f"Loaded config from file: address: {address}, port: {port}") + else: + address = args.address + port = args.port + # API key and email will be retrieved from OS environment in client constructor. + api_key = None + email = None + if not address: + logger.error( + "Either --address or --config (with existing file) is required." + ) + sys.exit(1) + + client = BenchmarkClient( + host=address, port=port, clockwork_api_key=api_key, email=email + ) + + output = [] + for i in range(args.nb_requests): + cs = client.profile_getting_user_jobs() + logger.info( + f"[{i + 1}] Sent request for username in {cs.pc_nanoseconds / 1e9} seconds, " + f"received {cs.nb_jobs} jobs." + ) + output.append(cs.summary()) + + if config_path and not os.path.exists(config_path): + # If args.config is defined, we save config file if args.config does not exist. + config = { + "address": client.host, + "port": client.port, + "api_key": client.clockwork_api_key, + "email": client.email, + } + with open(config_path, "w") as file: + json.dump(config, file) + logger.info(f"Saved config file at: {config_path}") + + output_path = os.path.join(working_directory, f"{args.output}.json") + with open(output_path, "w") as file: + json.dump(output, file) + logger.info(f"Saved stats at: {output_path}") + logger.info("End.") + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_job_request_benchmark.py b/scripts/plot_job_request_benchmark.py new file mode 100644 index 00000000..766751b4 --- /dev/null +++ b/scripts/plot_job_request_benchmark.py @@ -0,0 +1,144 @@ +import os +import sys +import json + + +try: + import matplotlib.pyplot as plt + # plt.figure(figure=(10.8, 7.2), dpi=100) +except Exception: + print( + "Matplotlib needed. You can install it with `pip install matplotlib`", + file=sys.stderr, + ) + raise + + +def main(): + if len(sys.argv) != 2: + print("Missing stats folder", file=sys.stderr) + sys.exit(1) + + # Get stat files. + folder = sys.argv[1] + stats_file_names = [] + for name in os.listdir(folder): + if name.startswith("jobs-") and name.endswith(".json"): + stats_file_names.append(name) + + # Get stat data. + stats = {} + nbs_jobs = [] + nbs_dicts = [] + nbs_props = [] + for name in sorted(stats_file_names): + title, extension = name.split(".") + jobs_info, dicts_info, props_info = title.split("_") + _, nb_jobs = jobs_info.split("-") + _, nb_dicts = dicts_info.split("-") + _, nb_props_per_dict = props_info.split("-") + nb_jobs = int(nb_jobs) + nb_dicts = int(nb_dicts) + nb_props_per_dict = int(nb_props_per_dict) + with open(os.path.join(folder, name)) as file: + local_stats = json.load(file) + assert len({stat["nb_jobs"] for stat in local_stats}) == 1 + durations = sorted(stat["pc_nanoseconds"] for stat in local_stats) + stats[(nb_jobs, nb_dicts, nb_props_per_dict)] = durations + nbs_jobs.append(nb_jobs) + nbs_dicts.append(nb_dicts) + nbs_props.append(nb_props_per_dict) + + assert max(nbs_jobs) == max(nbs_dicts) + N = max(nbs_jobs) + Ks = sorted(set(nbs_props)) + + _plot_request_time_per_nb_dicts(stats, N, Ks, folder) + _plots_request_time_per_nb_jobs(stats, N, Ks, folder) + + +def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str): + x_nb_dicts = [_compute_nb_jobs(n) for n in range(N + 1)] + y_time = {nb_props: [] for nb_props in Ks} + + for nb_props in Ks: + print() + for nb_dicts in range(N + 1): + key = (N, nb_dicts, nb_props) + average_duration = _debug_average_seconds(key, stats[key]) + y_time[nb_props].append(average_duration) + + fig, ax = plt.subplots() + for nb_props in Ks: + ax.plot( + x_nb_dicts, + y_time[nb_props], + marker='o', + label=f"{_compute_nb_jobs(N)} jobs in DB, {nb_props} prop(s) per dict", + ) + ax.set_title("Request duration per number of job-user dicts") + ax.set_xlabel("Number of job-user dicts in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join( + folder, + f"nb_dicts_to_time_for_{_compute_nb_jobs(N)}_jobs.jpg", + ) + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str): + x_nb_jobs = [_compute_nb_jobs(n) for n in range(N + 1)] + y_time_0_dicts_1_props = [] + y_time_N_dicts = {nb_props: [] for nb_props in Ks} + + print() + for nb_jobs in range(N + 1): + key = (nb_jobs, 0, 1) + average_duration = _debug_average_seconds(key, stats[key]) + y_time_0_dicts_1_props.append(average_duration) + print() + for nb_props in Ks: + for nb_jobs in range(N + 1): + key = (nb_jobs, N, nb_props) + average_duration = _debug_average_seconds(key, stats[key]) + y_time_N_dicts[nb_props].append(average_duration) + + fig, ax = plt.subplots() + ax.plot(x_nb_jobs, y_time_0_dicts_1_props, marker='o', label=f"0 job-user dicts in DB") + for nb_props in Ks: + ax.plot( + x_nb_jobs, + y_time_N_dicts[nb_props], + marker='o', + label=f"{_compute_nb_jobs(N)} job-user dicts in DB, {nb_props} props per dict", + ) + ax.set_title("Request duration per number of jobs") + ax.set_xlabel("Number of jobs in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join(folder, f"nb_jobs_to_time.jpg") + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _compute_nb_jobs(n: int): + return sum(2**i for i in range(n)) + + +def _debug_average_seconds(key, durations): + nb_jobs, nb_dicts, nb_props = key + avg = sum(durations) / (len(durations) * 1e9) + print( + f"jobs {nb_jobs:02} dicts {nb_dicts:02} props {nb_props:02}", + avg, + [d / 1e9 for d in durations], + ) + return avg + + +if __name__ == "__main__": + main() diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index 2b901a65..5f7f4c7d 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -335,52 +335,65 @@ } -def _generate_huge_fake_data(with_labels=False): - nb_jobs_per_user = [2**i for i in range(len(USERS))] - assert len(nb_jobs_per_user) == len(USERS) - nb_user_job_dicts = sum(nb_jobs_per_user) +DEFAULT_NB_JOBS = len(USERS) +DEFAULT_NB_DICTS = DEFAULT_NB_JOBS +DEFAULT_NB_PROPS_PER_DICT = 4 + + +def _generate_huge_fake_data( + nb_jobs=DEFAULT_NB_JOBS, + nb_dicts=DEFAULT_NB_DICTS, + nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT, +): jobs = [] - labels = [] + job_user_dicts = [] # populate jobs - job_id = 0 - for user, nb_user_jobs in zip(USERS, nb_jobs_per_user): - for i in range(nb_user_jobs): - job_id += 1 - job_slurm = BASE_JOB_SLURM.copy() - job_cw = BASE_JOB_CW.copy() - # edit slurm.job_id - job_slurm["job_id"] = str(job_id) - # edit slurm.name - job_slurm["name"] = f"job_name_{job_id}" - # edit slurm.username - job_slurm["username"] = user["cc_account_username"] - # edit cw.mila_email_username - job_cw["mila_email_username"] = user["mila_email_username"] - jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) - print("Nb. jobs:", job_id) - assert job_id == nb_user_job_dicts - - if with_labels: - # populate labels - for i in range(nb_user_job_dicts): + if nb_jobs: + assert 1 <= nb_jobs <= len(USERS) + nb_jobs_per_user = [2**i for i in range(nb_jobs)] + assert len(nb_jobs_per_user) == nb_jobs + job_id = 0 + for user, nb_user_jobs in zip(USERS[:nb_jobs], nb_jobs_per_user): + for i in range(nb_user_jobs): + job_id += 1 + job_slurm = BASE_JOB_SLURM.copy() + job_cw = BASE_JOB_CW.copy() + # edit slurm.job_id + job_slurm["job_id"] = str(job_id) + # edit slurm.name + job_slurm["name"] = f"job_name_{job_id}" + # edit slurm.username + job_slurm["username"] = user["cc_account_username"] + # edit cw.mila_email_username + job_cw["mila_email_username"] = user["mila_email_username"] + jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) + print("Nb. jobs:", job_id) + assert job_id == sum(nb_jobs_per_user) + + # populate job-user-dicts + if nb_dicts: + real_nb_dicts = sum(2**i for i in range(nb_dicts)) + for i in range(real_nb_dicts): user_job_dict = { "user_id": "student00@mila.quebec", "job_id": i + 1, "cluster_name": "beluga", "labels": { f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" - for j in range(4) + for j in range(nb_props_per_dict) }, } - labels.append(user_job_dict) + job_user_dicts.append(user_job_dict) + print("Nb. dicts:", real_nb_dicts) + print("NB. props per dict:", nb_props_per_dict) - return {"users": USERS, "jobs": jobs, "labels": labels} + return {"users": USERS, "jobs": jobs, "labels": job_user_dicts} -def populate_fake_data(db_insertion_point, labels=False): +def populate_fake_data(db_insertion_point, **kwargs): print("Generating huge fake data") - E = _generate_huge_fake_data(with_labels=labels) + E = _generate_huge_fake_data(**kwargs) print("Generated huge fake data") # Create indices. This isn't half as important as when we're @@ -404,9 +417,14 @@ def populate_fake_data(db_insertion_point, labels=False): ) for k in ["users", "jobs", "nodes", "gpu", "labels"]: + # Anyway clean before inserting + db_insertion_point[k].delete_many({}) if k in E and E[k]: print("Inserting", k) + # Then insert db_insertion_point[k].insert_many(E[k]) + # And check count + assert db_insertion_point[k].count_documents({}) == len(E[k]) print("Inserted", k) def cleanup_function(): @@ -451,48 +469,43 @@ def cleanup_function(): return cleanup_function -def store_data_in_db(labels=False): +def store_data_in_db(**kwargs): # Open the database and insert the contents. client = get_mongo_client() - populate_fake_data(client[get_config("mongo.database_name")], labels=labels) - - -def modify_timestamps(data): - """ - This function updates the timestamps in order to simulate jobs which have - been launched more recently than they were. - """ - # Retrieve the most recent timestamp (ie its end_time) - most_recent_timestamp = data["jobs"][0]["slurm"]["end_time"] - # most_recent_timestamp = min(job["slurm"]["end_time"] for job in data["jobs"]) - for job in data["jobs"]: - new_end_time = job["slurm"]["end_time"] - if new_end_time: - if new_end_time > most_recent_timestamp: - most_recent_timestamp = new_end_time - - # Retrieve the time interval between this timestamp and now - time_delta = datetime.now().timestamp() - most_recent_timestamp - - # Substract it to the timestamps of the jobs - for job in data["jobs"]: - if job["slurm"]["submit_time"]: - job["slurm"]["submit_time"] += time_delta - if job["slurm"]["start_time"]: - job["slurm"]["start_time"] += time_delta - if job["slurm"]["end_time"]: - job["slurm"]["end_time"] += time_delta + populate_fake_data(client[get_config("mongo.database_name")], **kwargs) def main(argv): # Retrieve the arguments passed to the script - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--nb-jobs", + type=int, + default=DEFAULT_NB_JOBS, + help="Number of users for which to add jobs. " + "Control the number of jobs in database by generating " + "2**i jobs for each user i from user <0> to user . " + "If 0, no jobs are added. " + f"Default is {DEFAULT_NB_JOBS}, for all users available, ie. " + f"{sum(2**i for i in range(DEFAULT_NB_JOBS))} total jobs.", + ) + parser.add_argument( + "--nb-dicts", + type=int, + default=DEFAULT_NB_DICTS, + help="Control the number of job-user dicts in database by generating " + "sum(2**i for i in range(nb-dicts)) dictionaries. " + "If 0, no dicts are added. " + f"Default is {DEFAULT_NB_DICTS} to match the maximum number of potential jobs, ie. " + f"{sum(2**i for i in range(DEFAULT_NB_DICTS))} total dicts.", + ) parser.add_argument( - "--labels", - type=bool, - action=argparse.BooleanOptionalAction, - default=False, - help="Add fake job-user dicts. If False (default), only add jobs.", + "--nb-props-per-dict", + type=int, + default=DEFAULT_NB_PROPS_PER_DICT, + help=f"Number of key-value pairs in each job-user dict.", ) args = parser.parse_args(argv[1:]) print(args) @@ -502,7 +515,11 @@ def main(argv): register_config("mongo.database_name", "clockwork") # Store the generated fake data in the database - store_data_in_db(labels=args.labels) + store_data_in_db( + nb_jobs=args.nb_jobs, + nb_dicts=args.nb_dicts, + nb_props_per_dict=args.nb_props_per_dict, + ) if __name__ == "__main__": From b90da413681141d4affb8d3b8d7ca7d04020e6c9 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 26 Feb 2024 19:23:16 -0500 Subject: [PATCH 13/19] Allow to pass raw number of jobs or dicts to benchmark scripts. --- scripts/gen_job_request_benchmark_script.py | 26 ++-- scripts/plot_job_request_benchmark.py | 48 +++++--- scripts/store_huge_fake_data_in_db.py | 127 ++++++-------------- 3 files changed, 85 insertions(+), 116 deletions(-) diff --git a/scripts/gen_job_request_benchmark_script.py b/scripts/gen_job_request_benchmark_script.py index b7621cd8..21aa7f00 100644 --- a/scripts/gen_job_request_benchmark_script.py +++ b/scripts/gen_job_request_benchmark_script.py @@ -1,9 +1,9 @@ import sys -# max: sum(2**i for i in range(n)) jobs -# max: sum(2**i for i in range(n)) dicts -N = 17 -Ks = (1, 10, 100) +# Ns = [i * 10_000 for i in range(16)] +Ns = [i * 10_000 for i in range(11)] +Ks = (1, 500) +N = Ns[-1] NB_REQUESTS = 10 @@ -17,19 +17,19 @@ def main(): print("set -eu") for nb_props_per_dict in Ks: - for nb_dicts in range(N + 1): + for nb_dicts in Ns: gen_commands(N, nb_dicts, nb_props_per_dict, wd) - for nb_jobs in range(N): + for nb_jobs in Ns[:-1]: gen_commands(nb_jobs, 0, 1, wd) for nb_props_per_dict in Ks: - for nb_jobs in range(N): + for nb_jobs in Ns[:-1]: gen_commands(nb_jobs, N, nb_props_per_dict, wd) def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory): - task_name = f"jobs-{nb_jobs:02}_dicts-{nb_dicts:02}_props-{nb_props_per_dict:02}" + task_name = f"jobs-{nb_jobs:06}_dicts-{nb_dicts:06}_props-{nb_props_per_dict:03}" cmd_fake_data = ( f"python3 scripts/store_huge_fake_data_in_db.py " @@ -43,8 +43,18 @@ def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory): f"--nb-requests {NB_REQUESTS} " f"--output {task_name}" ) + print(cmd_fake_data) + print('python3 -m flask run --host="0.0.0.0" &') + print("export SERVER_PID=$!") + print("sleep 1") + print( + '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''' + ) print(cmd_benchmark) + print("kill $SERVER_PID") + print("export SERVER_PID=") + print() if __name__ == "__main__": diff --git a/scripts/plot_job_request_benchmark.py b/scripts/plot_job_request_benchmark.py index 766751b4..551d1e39 100644 --- a/scripts/plot_job_request_benchmark.py +++ b/scripts/plot_job_request_benchmark.py @@ -5,6 +5,7 @@ try: import matplotlib.pyplot as plt + # plt.figure(figure=(10.8, 7.2), dpi=100) except Exception: print( @@ -49,21 +50,23 @@ def main(): nbs_dicts.append(nb_dicts) nbs_props.append(nb_props_per_dict) - assert max(nbs_jobs) == max(nbs_dicts) - N = max(nbs_jobs) + assert sorted(set(nbs_jobs)) == sorted(set(nbs_dicts)) + Ns = sorted(set(nbs_jobs)) Ks = sorted(set(nbs_props)) - _plot_request_time_per_nb_dicts(stats, N, Ks, folder) - _plots_request_time_per_nb_jobs(stats, N, Ks, folder) + _plot_request_time_per_nb_dicts(stats, Ns, Ks, folder) + _plots_request_time_per_nb_jobs(stats, Ns, Ks, folder) + +def _plot_request_time_per_nb_dicts(stats: dict, Ns: list, Ks: list, folder: str): + N = max(Ns) -def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str): - x_nb_dicts = [_compute_nb_jobs(n) for n in range(N + 1)] + x_nb_dicts = list(Ns) y_time = {nb_props: [] for nb_props in Ks} for nb_props in Ks: print() - for nb_dicts in range(N + 1): + for nb_dicts in Ns: key = (N, nb_dicts, nb_props) average_duration = _debug_average_seconds(key, stats[key]) y_time[nb_props].append(average_duration) @@ -73,9 +76,11 @@ def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str): ax.plot( x_nb_dicts, y_time[nb_props], - marker='o', + marker="o", label=f"{_compute_nb_jobs(N)} jobs in DB, {nb_props} prop(s) per dict", ) + _show_points(x_nb_dicts, y_time[nb_props]) + ax.set_title("Request duration per number of job-user dicts") ax.set_xlabel("Number of job-user dicts in DB") ax.set_ylabel("Request duration in seconds") @@ -89,32 +94,39 @@ def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str): plt.close(fig) -def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str): - x_nb_jobs = [_compute_nb_jobs(n) for n in range(N + 1)] +def _plots_request_time_per_nb_jobs(stats: dict, Ns: list, Ks: list, folder: str): + x_nb_jobs = list(Ns) y_time_0_dicts_1_props = [] y_time_N_dicts = {nb_props: [] for nb_props in Ks} + N = max(Ns) print() - for nb_jobs in range(N + 1): + for nb_jobs in Ns: key = (nb_jobs, 0, 1) average_duration = _debug_average_seconds(key, stats[key]) y_time_0_dicts_1_props.append(average_duration) print() for nb_props in Ks: - for nb_jobs in range(N + 1): + for nb_jobs in Ns: key = (nb_jobs, N, nb_props) average_duration = _debug_average_seconds(key, stats[key]) y_time_N_dicts[nb_props].append(average_duration) fig, ax = plt.subplots() - ax.plot(x_nb_jobs, y_time_0_dicts_1_props, marker='o', label=f"0 job-user dicts in DB") + ax.plot( + x_nb_jobs, y_time_0_dicts_1_props, marker="o", label=f"0 job-user dicts in DB" + ) + _show_points(x_nb_jobs, y_time_0_dicts_1_props) + for nb_props in Ks: ax.plot( x_nb_jobs, y_time_N_dicts[nb_props], - marker='o', + marker="o", label=f"{_compute_nb_jobs(N)} job-user dicts in DB, {nb_props} props per dict", ) + _show_points(x_nb_jobs, y_time_N_dicts[nb_props]) + ax.set_title("Request duration per number of jobs") ax.set_xlabel("Number of jobs in DB") ax.set_ylabel("Request duration in seconds") @@ -126,7 +138,13 @@ def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str): def _compute_nb_jobs(n: int): - return sum(2**i for i in range(n)) + return n + + +def _show_points(xs, ys): + # return + for x, y in zip(xs, ys): + plt.text(x, y, f"({x}, {round(y, 2)})") def _debug_average_seconds(key, durations): diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index 5f7f4c7d..e5cd698c 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -335,7 +335,7 @@ } -DEFAULT_NB_JOBS = len(USERS) +DEFAULT_NB_JOBS = 1_000_000 DEFAULT_NB_DICTS = DEFAULT_NB_JOBS DEFAULT_NB_PROPS_PER_DICT = 4 @@ -349,45 +349,37 @@ def _generate_huge_fake_data( job_user_dicts = [] # populate jobs - if nb_jobs: - assert 1 <= nb_jobs <= len(USERS) - nb_jobs_per_user = [2**i for i in range(nb_jobs)] - assert len(nb_jobs_per_user) == nb_jobs - job_id = 0 - for user, nb_user_jobs in zip(USERS[:nb_jobs], nb_jobs_per_user): - for i in range(nb_user_jobs): - job_id += 1 - job_slurm = BASE_JOB_SLURM.copy() - job_cw = BASE_JOB_CW.copy() - # edit slurm.job_id - job_slurm["job_id"] = str(job_id) - # edit slurm.name - job_slurm["name"] = f"job_name_{job_id}" - # edit slurm.username - job_slurm["username"] = user["cc_account_username"] - # edit cw.mila_email_username - job_cw["mila_email_username"] = user["mila_email_username"] - jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) - print("Nb. jobs:", job_id) - assert job_id == sum(nb_jobs_per_user) + for i in range(nb_jobs): + user = USERS[i % len(USERS)] + job_id = i + 1 + job_slurm = BASE_JOB_SLURM.copy() + job_cw = BASE_JOB_CW.copy() + # edit slurm.job_id + job_slurm["job_id"] = str(job_id) + # edit slurm.name + job_slurm["name"] = f"job_name_{job_id}" + # edit slurm.username + job_slurm["username"] = user["cc_account_username"] + # edit cw.mila_email_username + job_cw["mila_email_username"] = user["mila_email_username"] + jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) # populate job-user-dicts - if nb_dicts: - real_nb_dicts = sum(2**i for i in range(nb_dicts)) - for i in range(real_nb_dicts): - user_job_dict = { - "user_id": "student00@mila.quebec", - "job_id": i + 1, - "cluster_name": "beluga", - "labels": { - f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" - for j in range(nb_props_per_dict) - }, - } - job_user_dicts.append(user_job_dict) - print("Nb. dicts:", real_nb_dicts) - print("NB. props per dict:", nb_props_per_dict) - + for i in range(nb_dicts): + user_job_dict = { + "user_id": "student00@mila.quebec", + "job_id": i + 1, + "cluster_name": "beluga", + "labels": { + f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" + for j in range(nb_props_per_dict) + }, + } + job_user_dicts.append(user_job_dict) + + print( + f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}" + ) return {"users": USERS, "jobs": jobs, "labels": job_user_dicts} @@ -420,54 +412,12 @@ def populate_fake_data(db_insertion_point, **kwargs): # Anyway clean before inserting db_insertion_point[k].delete_many({}) if k in E and E[k]: - print("Inserting", k) - # Then insert + print(f"Inserting {k}, {len(E[k])} value(s)") db_insertion_point[k].insert_many(E[k]) - # And check count + # Check count assert db_insertion_point[k].count_documents({}) == len(E[k]) print("Inserted", k) - def cleanup_function(): - """ - Each of those kinds of data is identified in a unique way, - and we can use that identifier to clean up. - - For example, when clearing out jobs, we can look at the "job_id" - of the entries that we inserted. - - The point is that we can run a test against the production mongodb on Atlas - and not affect the real data. If we cleared the tables completely, - then we'd be affecting the real data in a bad way. - """ - for e in E["users"]: - db_insertion_point["users"].delete_many( - {"mila_email_username": e["mila_email_username"]} - ) - - for e in E["gpu"]: - db_insertion_point["gpu"].delete_many({"name": e["name"]}) - - for e in E["labels"]: - copy_e = e - copy_e.pop("labels") - db_insertion_point["labels"].delete_many(copy_e) - - for (k, sub, id_field) in [ - ("jobs", "slurm", "job_id"), - ("nodes", "slurm", "name"), - ]: - if k in E: - for e in E[k]: - # This is complicated, but it's just about a way to say something like - # that we want to remove {"slurm.job_id", e["slurm"]["job_id"]}, - # and the weird notation comes from the fact that mongodb filters use dots, - # but not the original python. - db_insertion_point[k].delete_many( - {f"{sub}.{id_field}": e[sub][id_field]} - ) - - return cleanup_function - def store_data_in_db(**kwargs): # Open the database and insert the contents. @@ -484,22 +434,13 @@ def main(argv): "--nb-jobs", type=int, default=DEFAULT_NB_JOBS, - help="Number of users for which to add jobs. " - "Control the number of jobs in database by generating " - "2**i jobs for each user i from user <0> to user . " - "If 0, no jobs are added. " - f"Default is {DEFAULT_NB_JOBS}, for all users available, ie. " - f"{sum(2**i for i in range(DEFAULT_NB_JOBS))} total jobs.", + help="Number of jobs to add. May be 0 (no job added).", ) parser.add_argument( "--nb-dicts", type=int, default=DEFAULT_NB_DICTS, - help="Control the number of job-user dicts in database by generating " - "sum(2**i for i in range(nb-dicts)) dictionaries. " - "If 0, no dicts are added. " - f"Default is {DEFAULT_NB_DICTS} to match the maximum number of potential jobs, ie. " - f"{sum(2**i for i in range(DEFAULT_NB_DICTS))} total dicts.", + help="Number of job-user dicts to add. May be 0 (no job added).", ) parser.add_argument( "--nb-props-per-dict", From bb0e6c13e5f9595e68e4a0b32ebc9b1b9a8a26ec Mon Sep 17 00:00:00 2001 From: notoraptor Date: Thu, 29 Feb 2024 13:54:35 -0500 Subject: [PATCH 14/19] Update --- scripts/gen_benchmark_script_students.py | 66 ++++++++++++ scripts/job_request_benchmark.py | 28 ++++- scripts/plot_benchmark_students.py | 124 +++++++++++++++++++++++ scripts/store_huge_fake_data_in_db.py | 109 ++++++++++++++++---- 4 files changed, 303 insertions(+), 24 deletions(-) create mode 100644 scripts/gen_benchmark_script_students.py create mode 100644 scripts/plot_benchmark_students.py diff --git a/scripts/gen_benchmark_script_students.py b/scripts/gen_benchmark_script_students.py new file mode 100644 index 00000000..26535e50 --- /dev/null +++ b/scripts/gen_benchmark_script_students.py @@ -0,0 +1,66 @@ +import sys +import os + +SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000] +SIZES_STUDENT01 = list(range(0, 101, 20)) +NB_PROPS_PER_DICT = 4 + +NB_REQUESTS = 10 + + +def main(): + if len(sys.argv) != 2: + print("Missing output folder name", file=sys.stderr) + exit(1) + + wd = sys.argv[1] + if not os.path.exists(wd): + os.mkdir(wd) + + print("set -eu") + print("export CLOCKWORK_API_KEY='000aaa01'") + print("export CLOCKWORK_EMAIL='student01@mila.quebec'") + print() + + for std_00 in SIZES_STUDENT00: + for std_01 in SIZES_STUDENT01: + gen_commands(std_00, std_01, wd) + + +def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory): + task_name = f"student00-{nb_jobs_student00:06}_student01-{nb_jobs_student01:06}" + nb_dicts = nb_jobs_student00 + nb_jobs_student01 + nb_props_per_dict = NB_PROPS_PER_DICT + + cmd_fake_data = ( + f"python3 scripts/store_huge_fake_data_in_db.py " + f"-j student00={nb_jobs_student00} " + f"-j student01={nb_jobs_student01} " + f"--nb-dicts {nb_dicts} " + f"--nb-props-per-dict {nb_props_per_dict}" + ) + cmd_benchmark = ( + f"python3 scripts/job_request_benchmark.py " + f"-w {working_directory} " + f'--address "0.0.0.0" ' + f"--port 5000 " + f'--username "student01@mila.quebec" ' + f"--nb-requests {NB_REQUESTS} " + f"--output {task_name}" + ) + + print(cmd_fake_data) + print('python3 -m flask run --host="0.0.0.0" &') + print("export SERVER_PID=$!") + print("sleep 1") + print( + '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''' + ) + print(cmd_benchmark) + print("kill $SERVER_PID") + print("export SERVER_PID=") + print() + + +if __name__ == "__main__": + main() diff --git a/scripts/job_request_benchmark.py b/scripts/job_request_benchmark.py index b324b8e7..cb293c58 100644 --- a/scripts/job_request_benchmark.py +++ b/scripts/job_request_benchmark.py @@ -4,7 +4,6 @@ import sys import logging import time -from datetime import datetime from collections import namedtuple import json @@ -108,6 +107,28 @@ def main(): "'address' (str), 'port` (int), 'api_key` (str), 'email' (str)." ), ) + parser.add_argument( + "-w", + "--working-directory", + type=str, + default=".", + help=( + "Working directory. " + "Default is '.'. " + "If `--config` specified, `--working-directory` is ignored " + "and working directory is config folder." + ), + ) + parser.add_argument( + "-u", + "--username", + type=str, + help=( + "Optional email of specific username for which we want to search jobs. " + "By default, no username is specified, and all jobs visible by logged user " + "(using client email an api key) are retrieved." + ), + ) parser.add_argument( "-n", "--nb-requests", @@ -130,9 +151,8 @@ def main(): logger.error(f"No positive time specified for benchmarking, exit.") sys.exit(1) - bench_date = datetime.now() config_path = None - working_directory = "." + working_directory = args.working_directory if args.config: config_path = os.path.abspath(args.config) working_directory = os.path.dirname(config_path) @@ -171,7 +191,7 @@ def main(): output = [] for i in range(args.nb_requests): - cs = client.profile_getting_user_jobs() + cs = client.profile_getting_user_jobs(username=args.username) logger.info( f"[{i + 1}] Sent request for username in {cs.pc_nanoseconds / 1e9} seconds, " f"received {cs.nb_jobs} jobs." diff --git a/scripts/plot_benchmark_students.py b/scripts/plot_benchmark_students.py new file mode 100644 index 00000000..5ae29b2b --- /dev/null +++ b/scripts/plot_benchmark_students.py @@ -0,0 +1,124 @@ +import os +import sys +import json + + +try: + import matplotlib.pyplot as plt + from matplotlib import colors + + # plt.figure(figure=(10.8, 7.2), dpi=100) +except Exception: + print( + "Matplotlib needed. You can install it with `pip install matplotlib`", + file=sys.stderr, + ) + raise + + +def main(): + if len(sys.argv) != 2: + print("Missing stats folder", file=sys.stderr) + sys.exit(1) + + # Get stat files. + folder = sys.argv[1] + stats_file_names = [] + for name in os.listdir(folder): + if name.startswith("student00-") and name.endswith(".json"): + stats_file_names.append(name) + + # Get stat data. + stats = {} + for name in sorted(stats_file_names): + title, extension = name.split(".") + info_student00, info_student01 = title.split("_") + _, nb_jobs_student00 = info_student00.split("-") + _, nb_jobs_student01 = info_student01.split("-") + nb_jobs_student00 = int(nb_jobs_student00) + nb_jobs_student01 = int(nb_jobs_student01) + + with open(os.path.join(folder, name)) as file: + local_stats = json.load(file) + nbs_jobs = {stat["nb_jobs"] for stat in local_stats} + assert len(nbs_jobs) == 1 + assert next(iter(nbs_jobs)) == nb_jobs_student01 + durations = sorted(stat["pc_nanoseconds"] for stat in local_stats) + stats[(nb_jobs_student00, nb_jobs_student01)] = durations + + _plots_request_time_per_nb_jobs(stats, folder) + + +def _plots_request_time_per_nb_jobs(stats: dict, folder: str): + cdict = { + "red": ( + (0.0, 0.0, 0.0), + # (1.0, 0.5, 0.5), + (1.0, 1.0, 0.0), + ), + "green": ( + (0.0, 0.0, 1.0), + # (1.0, 0.5, 0.5), + (1.0, 0.0, 0.0), + ), + "blue": ( + (0.0, 0.0, 0.0), + # (1.0, 0.0, 0.0), + (1.0, 0.0, 0.0), + ), + } + + cmap = colors.LinearSegmentedColormap("custom", cdict) + + student00_to_plot = {} + for (student00, student01), durations in stats.items(): + average_duration = _debug_average_seconds((student00, student01), durations) + student00_to_plot.setdefault(student00, []).append( + (student01, average_duration) + ) + + fig, ax = plt.subplots() + n = len(student00_to_plot) - 1 + for i, student00 in enumerate(sorted(student00_to_plot.keys())): + local_data = student00_to_plot[student00] + xs = [couple[0] for couple in local_data] + ys = [couple[1] for couple in local_data] + print(cmap(i / n)) + ax.plot( + xs, + ys, + marker="o", + label=f"student00: {student00} jobs", + c=cmap(i / n), + ) + # _show_points(xs, ys) + + ax.set_title("Request duration per number of jobs for student01") + ax.set_xlabel("Number of student01's jobs in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join(folder, f"nb_student01_jobs_to_time.jpg") + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _show_points(xs, ys): + # return + for x, y in zip(xs, ys): + plt.text(x, y, f"({x}, {round(y, 2)})") + + +def _debug_average_seconds(key, durations): + sdt00, std01 = key + avg = sum(durations) / (len(durations) * 1e9) + print( + f"student00 {sdt00:02} student01 {std01:02}", + avg, + [d / 1e9 for d in durations], + ) + return avg + + +if __name__ == "__main__": + main() diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index e5cd698c..28e8551e 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -342,32 +342,91 @@ def _generate_huge_fake_data( nb_jobs=DEFAULT_NB_JOBS, + nb_student_jobs=None, nb_dicts=DEFAULT_NB_DICTS, nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT, ): + student_to_nb_jobs = [] + if nb_student_jobs is not None: + for desc in nb_student_jobs: + student_name, str_nb_student_jobs = desc.split("=") + nb_student_jobs = int(str_nb_student_jobs.strip()) + student_to_nb_jobs.append((student_name.strip(), nb_student_jobs)) + else: + assert nb_jobs >= 0 + jobs = [] - job_user_dicts = [] # populate jobs - for i in range(nb_jobs): - user = USERS[i % len(USERS)] - job_id = i + 1 - job_slurm = BASE_JOB_SLURM.copy() - job_cw = BASE_JOB_CW.copy() - # edit slurm.job_id - job_slurm["job_id"] = str(job_id) - # edit slurm.name - job_slurm["name"] = f"job_name_{job_id}" - # edit slurm.username - job_slurm["username"] = user["cc_account_username"] - # edit cw.mila_email_username - job_cw["mila_email_username"] = user["mila_email_username"] - jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) + if student_to_nb_jobs: + user_map = {user["mila_email_username"]: user for user in USERS} + assert len(user_map) == len(USERS) + job_id = 0 + for student_name, nb_student_jobs in student_to_nb_jobs: + student_email = f"{student_name}@mila.quebec" + user = user_map[student_email] + for i in range(nb_student_jobs): + job_id += 1 + jobs.append( + { + "slurm": { + "account": "def-patate-rrg", + "cluster_name": "beluga", + "time_limit": 4320, + "submit_time": 1681680327, + "start_time": 0, + "end_time": 0, + "exit_code": "SUCCESS:0", + "array_job_id": "0", + "array_task_id": "None", + "job_id": str(job_id), + "name": f"job_name_{job_id}", + "nodes": "None assigned", + "partition": "other_fun_partition", + "job_state": "PENDING", + "tres_allocated": {}, + "tres_requested": { + "num_cpus": 80, + "mem": 95000, + "num_nodes": 1, + "billing": 80, + }, + "username": user["cc_account_username"], + "working_directory": "/a809/b333/c569", + }, + "cw": { + "mila_email_username": user["mila_email_username"], + "last_slurm_update": 1686248596.476063, + "last_slurm_update_by_sacct": 1686248596.476063, + }, + "user": {}, + } + ) + + print(f"Student {student_email}: {nb_student_jobs} jobs") + + assert job_id == len(jobs) + else: + for i in range(nb_jobs): + user = USERS[i % len(USERS)] + job_id = i + 1 + job_slurm = BASE_JOB_SLURM.copy() + job_cw = BASE_JOB_CW.copy() + # edit slurm.job_id + job_slurm["job_id"] = str(job_id) + # edit slurm.name + job_slurm["name"] = f"job_name_{job_id}" + # edit slurm.username + job_slurm["username"] = user["cc_account_username"] + # edit cw.mila_email_username + job_cw["mila_email_username"] = user["mila_email_username"] + jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) # populate job-user-dicts - for i in range(nb_dicts): - user_job_dict = { - "user_id": "student00@mila.quebec", + props_editor = "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec" + job_user_dicts = [ + { + "user_id": props_editor, "job_id": i + 1, "cluster_name": "beluga", "labels": { @@ -375,7 +434,8 @@ def _generate_huge_fake_data( for j in range(nb_props_per_dict) }, } - job_user_dicts.append(user_job_dict) + for i in range(nb_dicts) + ] print( f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}" @@ -430,7 +490,15 @@ def main(argv): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument( + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-j", + "--nb-student-jobs", + action="append", + type=str, + help="Number of job for a specific student, in format: =. Accept multiple declarations. Example: -j student00=100 -j student05=1900", + ) + group.add_argument( "--nb-jobs", type=int, default=DEFAULT_NB_JOBS, @@ -458,6 +526,7 @@ def main(argv): # Store the generated fake data in the database store_data_in_db( nb_jobs=args.nb_jobs, + nb_student_jobs=args.nb_student_jobs, nb_dicts=args.nb_dicts, nb_props_per_dict=args.nb_props_per_dict, ) From 679d1e5c5ef242580897806c169da86aebf7d9a7 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Thu, 29 Feb 2024 15:45:17 -0500 Subject: [PATCH 15/19] Rename collection and related texts from "labels" to "job user props". --- clockwork_web/browser_routes/jobs.py | 8 +-- clockwork_web/core/jobs_helper.py | 71 ++++++++++++------------ clockwork_web/core/search_helper.py | 12 ++-- clockwork_web/templates/base.html | 14 ++--- clockwork_web/templates/jobs_search.html | 18 +++--- clockwork_web/templates/settings.html | 4 +- scripts/store_huge_fake_data_in_db.py | 18 +++--- test_common/fake_data.json | 34 ++++++------ test_common/fake_data.py | 30 +++++----- test_common/jobs_test_helpers.py | 4 +- 10 files changed, 109 insertions(+), 104 deletions(-) diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py index b3dffbfb..72bbfc18 100644 --- a/clockwork_web/browser_routes/jobs.py +++ b/clockwork_web/browser_routes/jobs.py @@ -101,8 +101,8 @@ def route_search(): - "sort_asc" is an optional integer and used to specify if sorting is ascending (1) or descending (-1). Default is 1. - "job_array" is optional and used to specify the job array in which we are looking for jobs - - "job_label_name" is optional and used to specify the label name associated to jobs we are looking for - - "job_label_content" is optional and used to specify the label value associated to jobs we are looking for + - "user_prop_name" is optional and used to specify the user prop name associated to jobs we are looking for + - "user_prop_content" is optional and used to specify the user prop value associated to jobs we are looking for .. :quickref: list all Slurm job as formatted html """ @@ -166,8 +166,8 @@ def route_search(): "sort_by": query.sort_by, "sort_asc": query.sort_asc, "job_array": query.job_array, - "job_label_name": query.job_label_name, - "job_label_content": query.job_label_content, + "user_prop_name": query.user_prop_name, + "user_prop_content": query.user_prop_content, }, ) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index 30f82354..d70d699b 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -158,32 +158,35 @@ def get_filtered_and_paginated_jobs( # on the server because not enough memory was allocated to perform the sorting. LD_jobs = list(mc["jobs"].find(mongodb_filter)) - # Get job labels + # Get job user props if LD_jobs: - label_map = {} - # Collect all labels related to found jobs, - # and store them in a dict with keys (user ID, job ID, cluster_name) - for label in list( - mc["labels"].find( + user_props_map = {} + # Collect all job user props related to found jobs, + # and store them in a dict with keys (mila email username, job ID, cluster_name) + for user_props in list( + mc["job_user_props"].find( combine_all_mongodb_filters( { "job_id": { "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs] }, - "user_id": current_user.mila_email_username, + "mila_email_username": current_user.mila_email_username, } ) ) ): - # Remove MongoDB identifier, as we won't use it. - label.pop("_id") - key = (label["user_id"], label["job_id"], label["cluster_name"]) - assert key not in label_map - label_map[key] = label["labels"] - - if label_map: - # Populate jobs with labels using job's user email, job ID and cluster name - # to find related labels in labels dict. + key = ( + user_props["mila_email_username"], + user_props["job_id"], + user_props["cluster_name"], + ) + assert key not in user_props_map + user_props_map[key] = user_props["props"] + + if user_props_map: + # Populate jobs with user props using + # current user email, job ID and job cluster name + # to find related user props in props map. for job in LD_jobs: key = ( # job["cw"]["mila_email_username"], @@ -191,8 +194,8 @@ def get_filtered_and_paginated_jobs( int(job["slurm"]["job_id"]), job["slurm"]["cluster_name"], ) - if key in label_map: - job["job_labels"] = label_map[key] + if key in user_props_map: + job["job_user_props"] = user_props_map[key] # Set nbr_total_jobs if want_count: @@ -272,8 +275,8 @@ def get_jobs( sort_by="submit_time", sort_asc=-1, job_array=None, - job_label_name=None, - job_label_content=None, + user_prop_name=None, + user_prop_content=None, ): """ Set up the filters according to the parameters and retrieve the requested jobs from the database. @@ -291,8 +294,8 @@ def get_jobs( sort_asc Whether or not to sort in ascending order (1) or descending order (-1). job_array ID of job array in which we look for jobs. - job_label_name name of label (string) we must find in jobs to look for. - job_label_content content of label (string) we must find in jobs to look for. + user_prop_name name of user prop (string) we must find in jobs to look for. + user_prop_content content of user prop (string) we must find in jobs to look for. Returns: A tuple containing: @@ -300,24 +303,24 @@ def get_jobs( - the total number of jobs corresponding of the filters in the databse, if want_count has been set to True, None otherwise, as second element """ - # If job label is specified, - # get job indices from jobs associated to this label. - if job_label_name is not None and job_label_content is not None: + # If job user prop is specified, + # get job indices from jobs associated to this prop. + if user_prop_name is not None and user_prop_content is not None: mc = get_db() - label_job_ids = [ - str(label["job_id"]) - for label in mc["labels"].find( + props_job_ids = [ + str(user_props["job_id"]) + for user_props in mc["job_user_props"].find( combine_all_mongodb_filters( - {f"labels.{job_label_name}": job_label_content} + {f"props.{user_prop_name}": user_prop_content} ) ) ] if job_ids: - # If job ids where provided, make intersection between given job ids and labelled job ids. - job_ids = list(set(label_job_ids) & set(job_ids)) + # If job ids where provided, make intersection between given job ids and props job ids. + job_ids = list(set(props_job_ids) & set(job_ids)) else: - # Otherwise, just use labelled job ids. - job_ids = label_job_ids + # Otherwise, just use props job ids. + job_ids = props_job_ids # Set up and combine filters filter = get_global_filter( @@ -464,7 +467,7 @@ def get_jobs_properties_list_per_page(): "user", "job_id", "job_array", - "job_labels", + "job_user_props", "job_name", "job_state", "start_time", diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py index 54e6a75c..2650c201 100644 --- a/clockwork_web/core/search_helper.py +++ b/clockwork_web/core/search_helper.py @@ -21,8 +21,8 @@ def parse_search_request(user, args, force_pagination=True): want_count = to_boolean(want_count) job_array = args.get("job_array", type=int, default=None) - job_label_name = args.get("job_label_name", type=str, default=None) or None - job_label_content = args.get("job_label_content", type=str, default=None) or None + user_prop_name = args.get("user_prop_name", type=str, default=None) or None + user_prop_content = args.get("user_prop_content", type=str, default=None) or None default_page_number = "1" if force_pagination else None @@ -73,8 +73,8 @@ def parse_search_request(user, args, force_pagination=True): sort_asc=sort_asc, want_count=want_count, job_array=job_array, - job_label_name=job_label_name, - job_label_content=job_label_content, + user_prop_name=user_prop_name, + user_prop_content=user_prop_content, ) ######################### @@ -119,7 +119,7 @@ def search_request(user, args, force_pagination=True): sort_by=query.sort_by, sort_asc=query.sort_asc, job_array=query.job_array, - job_label_name=query.job_label_name, - job_label_content=query.job_label_content, + user_prop_name=query.user_prop_name, + user_prop_content=query.user_prop_content, ) return (query, jobs, nbr_total_jobs) diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html index 51d8c7d8..8730dcd2 100644 --- a/clockwork_web/templates/base.html +++ b/clockwork_web/templates/base.html @@ -324,11 +324,11 @@

{% endif %} - {% if previous_request_args['job_label_name'] is not none %} - + {% if previous_request_args['user_prop_name'] is not none %} + {% endif %} - {% if previous_request_args['job_label_content'] is not none %} - + {% if previous_request_args['user_prop_content'] is not none %} + {% endif %}
@@ -341,9 +341,9 @@

- Label {{ previous_request_args['job_label_name'] }}: "{{ previous_request_args['job_label_content'] }}"     + {% if previous_request_args['user_prop_name'] is not none and previous_request_args['user_prop_content'] is not none %} + + User prop {{ previous_request_args['user_prop_name'] }}: "{{ previous_request_args['user_prop_content'] }}"     {% endif %} diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html index 2bd20321..f225a7c0 100644 --- a/clockwork_web/templates/jobs_search.html +++ b/clockwork_web/templates/jobs_search.html @@ -101,9 +101,9 @@

JOBS

{% if (web_settings | check_web_settings_column_display(page_name, "job_array")) %} Job array {% endif %} - - {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} - labels + + {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %} + Job-user props {% endif %} {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %} @@ -197,14 +197,14 @@

JOBS

{% endif %} - - {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %} + + {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %} - {% for D_label_name, D_label_content in D_job.get('job_labels', {}).items() %} + {% for D_user_prop_name, D_user_prop_content in D_job.get('job_user_props', {}).items() %}

- - {{ D_label_name }}
- {{ D_label_content }} +
+ {{ D_user_prop_name }}
+ {{ D_user_prop_content }}

{% endfor %} diff --git a/clockwork_web/templates/settings.html b/clockwork_web/templates/settings.html index 8fefe2ed..a9b04f51 100644 --- a/clockwork_web/templates/settings.html +++ b/clockwork_web/templates/settings.html @@ -279,7 +279,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {{ gettext("User (@mila.quebec)") }} {{ gettext("Job ID") }} {{ gettext("Job array") }} - {{ gettext("Job labels") }} + {{ gettext("Job-user props") }} {{ gettext("Job name [:20]") }} {{ gettext("Job state") }} {{ gettext("Submit time") }} @@ -292,7 +292,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {% set page_name = "jobs_list" %} - {% for column_name in ["clusters", "user","job_id", "job_array", "job_labels", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %} + {% for column_name in ["clusters", "user","job_id", "job_array", "job_user_props", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
{% if (web_settings | check_web_settings_column_display(page_name, column_name)) %} diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index 28e8551e..73fd07e5 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -423,13 +423,15 @@ def _generate_huge_fake_data( jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) # populate job-user-dicts - props_editor = "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec" + props_editor = ( + "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec" + ) job_user_dicts = [ { - "user_id": props_editor, + "mila_email_username": props_editor, "job_id": i + 1, "cluster_name": "beluga", - "labels": { + "props": { f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" for j in range(nb_props_per_dict) }, @@ -440,7 +442,7 @@ def _generate_huge_fake_data( print( f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}" ) - return {"users": USERS, "jobs": jobs, "labels": job_user_dicts} + return {"users": USERS, "jobs": jobs, "job_user_props": job_user_dicts} def populate_fake_data(db_insertion_point, **kwargs): @@ -463,12 +465,12 @@ def populate_fake_data(db_insertion_point, **kwargs): [("mila_email_username", 1)], name="users_email_index" ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") - db_insertion_point["labels"].create_index( - [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], - name="job_label_index", + db_insertion_point["job_user_props"].create_index( + [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)], + name="job_user_props_index", ) - for k in ["users", "jobs", "nodes", "gpu", "labels"]: + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: # Anyway clean before inserting db_insertion_point[k].delete_many({}) if k in E and E[k]: diff --git a/test_common/fake_data.json b/test_common/fake_data.json index 0ffcbee2..29c18e58 100644 --- a/test_common/fake_data.json +++ b/test_common/fake_data.json @@ -5963,46 +5963,46 @@ "tflops_fp32": 16.31 } ], - "labels": [ + "job_user_props": [ { - "user_id": "student06@mila.quebec", + "mila_email_username": "student00@mila.quebec", "job_id": 795002, "cluster_name": "mila", - "labels": { - "name": "je suis un label 1" + "props": { + "name": "je suis une user prop 1" } }, { - "user_id": "student16@mila.quebec", + "mila_email_username": "student00@mila.quebec", "job_id": 606872, "cluster_name": "mila", - "labels": { - "name": "je suis un label 2" + "props": { + "name": "je suis une user prop 2" } }, { - "user_id": "student15@mila.quebec", + "mila_email_username": "student00@mila.quebec", "job_id": 834395, "cluster_name": "graham", - "labels": { - "name": "je suis un label 3" + "props": { + "name": "je suis une user prop 3" } }, { - "user_id": "student15@mila.quebec", + "mila_email_username": "student00@mila.quebec", "job_id": 154325, "cluster_name": "graham", - "labels": { - "name": "je suis un label 3", - "name2": "je suis un label 4" + "props": { + "name": "je suis une user prop 3", + "name2": "je suis une user prop 4" } }, { - "user_id": "student12@mila.quebec", + "mila_email_username": "student00@mila.quebec", "job_id": 613024, "cluster_name": "graham", - "labels": { - "name": "je suis un label 1" + "props": { + "name": "je suis une user prop 1" } } ] diff --git a/test_common/fake_data.py b/test_common/fake_data.py index 0677e267..7efb55ea 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -21,18 +21,18 @@ def fake_data(): with open(json_file, "r") as f: E = json.load(f) - # Add labels to jobs + # Add user props to jobs for job in E["jobs"]: job_id = int(job["slurm"]["job_id"]) - user_id = job["cw"]["mila_email_username"] + mila_email_username = job["cw"]["mila_email_username"] cluster_name = job["slurm"]["cluster_name"] - for label in E["labels"]: + for user_props in E["job_user_props"]: if ( - label["job_id"] == job_id - and label["user_id"] == user_id - and label["cluster_name"] == cluster_name + user_props["job_id"] == job_id + and user_props["mila_email_username"] == mila_email_username + and user_props["cluster_name"] == cluster_name ): - job["job_labels"] = label["labels"] + job["job_user_props"] = user_props["props"] mutate_some_job_status(E) return E @@ -84,12 +84,12 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): [("mila_email_username", 1)], name="users_email_index" ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") - db_insertion_point["labels"].create_index( - [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], - name="job_label_index", + db_insertion_point["job_user_props"].create_index( + [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)], + name="job_user_props_index", ) - for k in ["users", "jobs", "nodes", "gpu", "labels"]: + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: if k in E: for e in E[k]: db_insertion_point[k].insert_one(e) @@ -114,10 +114,10 @@ def cleanup_function(): for e in E["gpu"]: db_insertion_point["gpu"].delete_many({"name": e["name"]}) - for e in E["labels"]: - copy_e = e - copy_e.pop("labels") - db_insertion_point["labels"].delete_many(copy_e) + for e in E["job_user_props"]: + copy_e = e.copy() + copy_e.pop("props") + db_insertion_point["job_user_props"].delete_many(copy_e) for (k, sub, id_field) in [ ("jobs", "slurm", "job_id"), diff --git a/test_common/jobs_test_helpers.py b/test_common/jobs_test_helpers.py index b66ac7af..a1f81872 100644 --- a/test_common/jobs_test_helpers.py +++ b/test_common/jobs_test_helpers.py @@ -34,7 +34,7 @@ def helper_single_job_at_random(fake_data, cluster_name): def validator(D_job): for k1 in original_D_job: - assert k1 in ["slurm", "cw", "user", "job_labels"] + assert k1 in ["slurm", "cw", "user", "job_user_props"] assert D_job[k1] == original_D_job[k1], f"{D_job}\n{original_D_job}" return validator, job_id @@ -164,7 +164,7 @@ def validator(LD_jobs): # compare all the dicts one by one for (D_job, D_original_job) in zip(LD_jobs, LD_original_jobs): for k1 in D_original_job: - assert k1 in ["slurm", "cw", "user", "job_labels"] + assert k1 in ["slurm", "cw", "user", "job_user_props"] assert D_job[k1] == D_original_job[k1] return validator From 85c6017ff1e9c24377639d6d3e12a76092e9f8e3 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 4 Mar 2024 09:45:56 -0500 Subject: [PATCH 16/19] Update. --- scripts/gen_benchmark_script_students.py | 117 +++++++++++++++-------- scripts/plot_benchmark_students.py | 37 +++++-- scripts/requirements.txt | 1 + scripts/store_huge_fake_data_in_db.py | 75 ++++++++++----- 4 files changed, 157 insertions(+), 73 deletions(-) diff --git a/scripts/gen_benchmark_script_students.py b/scripts/gen_benchmark_script_students.py index 26535e50..d8d05381 100644 --- a/scripts/gen_benchmark_script_students.py +++ b/scripts/gen_benchmark_script_students.py @@ -1,5 +1,7 @@ import sys import os +from datetime import datetime +import argparse SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000] SIZES_STUDENT01 = list(range(0, 101, 20)) @@ -8,59 +10,90 @@ NB_REQUESTS = 10 -def main(): - if len(sys.argv) != 2: - print("Missing output folder name", file=sys.stderr) - exit(1) +def main(argv): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--disable-index", + action="store_true", + help="If specified, will not create MongoDB index when storing fake data.", + ) + args = parser.parse_args(argv[1:]) + print("Generating benchmark script with args:", args, file=sys.stderr) + + bench_date = datetime.now() + bench_basename = "bench_students" + if args.disable_index: + bench_basename += "_noindex" + bench_name = f"{bench_basename}_{bench_date}".replace(" ", "_").replace(":", "-") + assert not os.path.exists(bench_name) + os.mkdir(bench_name) + + script_name = f"{bench_name}.sh" + with open(script_name, "w") as file: + print("set -eu", file=file) + print("export CLOCKWORK_API_KEY='000aaa01'", file=file) + print("export CLOCKWORK_EMAIL='student01@mila.quebec'", file=file) + print(file=file) - wd = sys.argv[1] - if not os.path.exists(wd): - os.mkdir(wd) + for std_00 in SIZES_STUDENT00: + for std_01 in SIZES_STUDENT01: + gen_commands(std_00, std_01, bench_name, args, file) - print("set -eu") - print("export CLOCKWORK_API_KEY='000aaa01'") - print("export CLOCKWORK_EMAIL='student01@mila.quebec'") - print() + print(file=file) + print(f"python3 scripts/plot_benchmark_students.py {bench_name}", file=file) + print(f"tar -cf {bench_name}.tar {bench_name}/", file=file) + print(f"echo Benchmark compressed in: {bench_name}.tar", file=file) - for std_00 in SIZES_STUDENT00: - for std_01 in SIZES_STUDENT01: - gen_commands(std_00, std_01, wd) + print("Benchmark script saved in:", script_name, file=sys.stderr) -def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory): - task_name = f"student00-{nb_jobs_student00:06}_student01-{nb_jobs_student01:06}" +def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory, args, file): nb_dicts = nb_jobs_student00 + nb_jobs_student01 - nb_props_per_dict = NB_PROPS_PER_DICT + task_name = ( + f"std00-{nb_jobs_student00:06}_" + f"std01-{nb_jobs_student01:06}_" + f"dicts-{nb_dicts}_" + f"props-{NB_PROPS_PER_DICT}_" + f"index-{0 if args.disable_index else 1}" + ) - cmd_fake_data = ( - f"python3 scripts/store_huge_fake_data_in_db.py " - f"-j student00={nb_jobs_student00} " - f"-j student01={nb_jobs_student01} " - f"--nb-dicts {nb_dicts} " - f"--nb-props-per-dict {nb_props_per_dict}" + print( + ( + f"python3 scripts/store_huge_fake_data_in_db.py " + f"-j student00={nb_jobs_student00} " + f"-j student01={nb_jobs_student01} " + f"--nb-dicts {nb_dicts} " + f"--nb-props-per-dict {NB_PROPS_PER_DICT} " + f"--props-username student01@mila.quebec " + f"{'--disable-index' if args.disable_index else ''}" + ), + file=file, ) - cmd_benchmark = ( - f"python3 scripts/job_request_benchmark.py " - f"-w {working_directory} " - f'--address "0.0.0.0" ' - f"--port 5000 " - f'--username "student01@mila.quebec" ' - f"--nb-requests {NB_REQUESTS} " - f"--output {task_name}" + print('python3 -m flask run --host="0.0.0.0" &', file=file) + print("export SERVER_PID=$!", file=file) + print("sleep 1", file=file) + print( + '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''', + file=file, ) - - print(cmd_fake_data) - print('python3 -m flask run --host="0.0.0.0" &') - print("export SERVER_PID=$!") - print("sleep 1") print( - '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''' + ( + f"python3 scripts/job_request_benchmark.py " + f"-w {working_directory} " + f'--address "0.0.0.0" ' + f"--port 5000 " + f'--username "student01@mila.quebec" ' + f"--nb-requests {NB_REQUESTS} " + f"--output {task_name}" + ), + file=file, ) - print(cmd_benchmark) - print("kill $SERVER_PID") - print("export SERVER_PID=") - print() + print("kill $SERVER_PID", file=file) + print("export SERVER_PID=", file=file) + print(file=file) if __name__ == "__main__": - main() + main(sys.argv) diff --git a/scripts/plot_benchmark_students.py b/scripts/plot_benchmark_students.py index 5ae29b2b..c39086fe 100644 --- a/scripts/plot_benchmark_students.py +++ b/scripts/plot_benchmark_students.py @@ -25,18 +25,33 @@ def main(): folder = sys.argv[1] stats_file_names = [] for name in os.listdir(folder): - if name.startswith("student00-") and name.endswith(".json"): + if name.startswith("std00-") and name.endswith(".json"): stats_file_names.append(name) # Get stat data. stats = {} + infos_nb_props = set() + infos_index = set() for name in sorted(stats_file_names): title, extension = name.split(".") - info_student00, info_student01 = title.split("_") + ( + info_student00, + info_student01, + info_nb_dicts, + info_nb_props, + info_index, + ) = title.split("_") _, nb_jobs_student00 = info_student00.split("-") _, nb_jobs_student01 = info_student01.split("-") + _, nb_dicts = info_nb_dicts.split("-") + _, nb_props = info_nb_props.split("-") + _, nb_index = info_index.split("-") nb_jobs_student00 = int(nb_jobs_student00) nb_jobs_student01 = int(nb_jobs_student01) + nb_props = int(nb_props) + nb_index = int(nb_index) + infos_nb_props.add(nb_props) + infos_index.add(nb_index) with open(os.path.join(folder, name)) as file: local_stats = json.load(file) @@ -46,10 +61,17 @@ def main(): durations = sorted(stat["pc_nanoseconds"] for stat in local_stats) stats[(nb_jobs_student00, nb_jobs_student01)] = durations - _plots_request_time_per_nb_jobs(stats, folder) + assert len(infos_nb_props) == 1 + assert len(infos_index) == 1 + nb_props = next(iter(infos_nb_props)) + nb_index = next(iter(infos_index)) + output_name = f"nb-student01-jobs-to-time_props-{nb_props}_index-{nb_index}" + _plots_request_time_per_nb_jobs(stats, folder, output_name, nb_props, nb_index) -def _plots_request_time_per_nb_jobs(stats: dict, folder: str): +def _plots_request_time_per_nb_jobs( + stats: dict, folder: str, output_name: str, nb_props: int, has_index: int +): cdict = { "red": ( (0.0, 0.0, 0.0), @@ -93,11 +115,14 @@ def _plots_request_time_per_nb_jobs(stats: dict, folder: str): ) # _show_points(xs, ys) - ax.set_title("Request duration per number of jobs for student01") + ax.set_title( + f"Request duration per number of jobs for student01 ({nb_props} props per dict)" + + (" (no MongoDB index)" if not has_index else "") + ) ax.set_xlabel("Number of student01's jobs in DB") ax.set_ylabel("Request duration in seconds") ax.legend() - plot_path = os.path.join(folder, f"nb_student01_jobs_to_time.jpg") + plot_path = os.path.join(folder, f"{output_name}.jpg") plt.gcf().set_size_inches(20, 10) plt.savefig(plot_path, bbox_inches="tight") plt.close(fig) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 77c7ad90..6af79857 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -9,3 +9,4 @@ MarkupSafe==2.1.3 pyasn1==0.5.0 pymongo==4.5.0 Werkzeug==3.0.1 +matplotlib==3.8.3 diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index 73fd07e5..e3791d61 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -35,7 +35,6 @@ import argparse import sys -from datetime import datetime from clockwork_web.config import register_config from slurm_state.mongo_client import get_mongo_client @@ -345,6 +344,7 @@ def _generate_huge_fake_data( nb_student_jobs=None, nb_dicts=DEFAULT_NB_DICTS, nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT, + props_username="student00@mila.quebec", ): student_to_nb_jobs = [] if nb_student_jobs is not None: @@ -423,12 +423,9 @@ def _generate_huge_fake_data( jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) # populate job-user-dicts - props_editor = ( - "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec" - ) job_user_dicts = [ { - "mila_email_username": props_editor, + "mila_email_username": props_username, "job_id": i + 1, "cluster_name": "beluga", "props": { @@ -446,29 +443,41 @@ def _generate_huge_fake_data( def populate_fake_data(db_insertion_point, **kwargs): + disable_index = kwargs.pop("disable_index", False) + print("Generating huge fake data") E = _generate_huge_fake_data(**kwargs) print("Generated huge fake data") - # Create indices. This isn't half as important as when we're - # dealing with large quantities of data, but it's part of the - # set up for the database. - db_insertion_point["jobs"].create_index( - [("slurm.job_id", 1), ("slurm.cluster_name", 1)], - name="job_id_and_cluster_name", - ) - db_insertion_point["nodes"].create_index( - [("slurm.name", 1), ("slurm.cluster_name", 1)], - name="name_and_cluster_name", - ) - db_insertion_point["users"].create_index( - [("mila_email_username", 1)], name="users_email_index" - ) - db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") - db_insertion_point["job_user_props"].create_index( - [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)], - name="job_user_props_index", - ) + if not disable_index: + print("Generate MongoDB index.") + # Create indices. This isn't half as important as when we're + # dealing with large quantities of data, but it's part of the + # set up for the database. + db_insertion_point["jobs"].create_index( + [ + ("slurm.job_id", 1), + ("slurm.cluster_name", 1), + ("cw.mila_email_username", 1), + ], + name="job_id_and_cluster_name", + ) + db_insertion_point["nodes"].create_index( + [("slurm.name", 1), ("slurm.cluster_name", 1)], + name="name_and_cluster_name", + ) + db_insertion_point["users"].create_index( + [("mila_email_username", 1)], name="users_email_index" + ) + db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") + db_insertion_point["job_user_props"].create_index( + [ + ("mila_email_username", 1), + ("job_id", 1), + ("cluster_name", 1), + ], + name="job_user_props_index", + ) for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: # Anyway clean before inserting @@ -498,7 +507,10 @@ def main(argv): "--nb-student-jobs", action="append", type=str, - help="Number of job for a specific student, in format: =. Accept multiple declarations. Example: -j student00=100 -j student05=1900", + help=( + "Number of job for a specific student, in format: =. " + "Accept multiple declarations. Example: -j student00=100 -j student05=1900" + ), ) group.add_argument( "--nb-jobs", @@ -518,6 +530,17 @@ def main(argv): default=DEFAULT_NB_PROPS_PER_DICT, help=f"Number of key-value pairs in each job-user dict.", ) + parser.add_argument( + "--props-username", + type=str, + default="student00@mila.quebec", + help="Email of user who creates job-user dicts.", + ) + parser.add_argument( + "--disable-index", + action="store_true", + help="If specified, will not create MongoDB index.", + ) args = parser.parse_args(argv[1:]) print(args) @@ -531,6 +554,8 @@ def main(argv): nb_student_jobs=args.nb_student_jobs, nb_dicts=args.nb_dicts, nb_props_per_dict=args.nb_props_per_dict, + props_username=args.props_username, + disable_index=args.disable_index, ) From bad73f5baa0fc0ab78621c7f9bcd24a2778dda84 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 4 Mar 2024 14:03:08 -0500 Subject: [PATCH 17/19] Make sure to clean collections and indexes before inserting new fake date. --- scripts/store_huge_fake_data_in_db.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py index e3791d61..00f36051 100644 --- a/scripts/store_huge_fake_data_in_db.py +++ b/scripts/store_huge_fake_data_in_db.py @@ -449,6 +449,11 @@ def populate_fake_data(db_insertion_point, **kwargs): E = _generate_huge_fake_data(**kwargs) print("Generated huge fake data") + # Drop any collection (and related index) before. + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: + db_insertion_point[k].drop() + assert not list(db_insertion_point[k].list_indexes()) + if not disable_index: print("Generate MongoDB index.") # Create indices. This isn't half as important as when we're @@ -479,6 +484,9 @@ def populate_fake_data(db_insertion_point, **kwargs): name="job_user_props_index", ) + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: + assert list(db_insertion_point[k].list_indexes()) + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: # Anyway clean before inserting db_insertion_point[k].delete_many({}) From 6bad887d628cf6cea4dd3b95f581df212f543800 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Tue, 5 Mar 2024 11:53:53 -0500 Subject: [PATCH 18/19] Check if current_user is available before getting job user props. --- clockwork_web/core/jobs_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index d70d699b..26831374 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -159,7 +159,7 @@ def get_filtered_and_paginated_jobs( LD_jobs = list(mc["jobs"].find(mongodb_filter)) # Get job user props - if LD_jobs: + if LD_jobs and current_user: user_props_map = {} # Collect all job user props related to found jobs, # and store them in a dict with keys (mila email username, job ID, cluster_name) From a7b2f4f108afd5671b467bb0717e8f97543db96f Mon Sep 17 00:00:00 2001 From: notoraptor Date: Mon, 18 Mar 2024 09:12:03 -0400 Subject: [PATCH 19/19] server_benchmark_locust: add a commented code. Code allows to use EMAIL as username to query requests, instead of randomly taking in server usernames. --- scripts/server_benchmark_locust.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/server_benchmark_locust.py b/scripts/server_benchmark_locust.py index 00eeaf18..501fb245 100644 --- a/scripts/server_benchmark_locust.py +++ b/scripts/server_benchmark_locust.py @@ -107,6 +107,7 @@ def __init__(self, *args, **kwargs): global NEXT_USER_ID super().__init__(*args, **kwargs) self.username = USERNAMES[NEXT_USER_ID % len(USERNAMES)] + # self.username = EMAIL # Move to next username for next user NEXT_USER_ID += 1 print("Username:", NEXT_USER_ID, self.username)