From 7d9a3804a397fc6a35ea7cb33e5f957a4400bdd2 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 6 Oct 2023 10:43:05 -0400
Subject: [PATCH 01/19] Add labels to fake data. Allow to show/hide labels
 column in settings.

---
 clockwork_web/core/jobs_helper.py     |  1 +
 clockwork_web/templates/settings.html |  3 ++-
 test_common/fake_data.json            | 34 ++++++++++++++++++++++++++-
 test_common/fake_data.py              |  3 ++-
 4 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index a9ef4c72..c896ba0c 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -405,6 +405,7 @@ def get_jobs_properties_list_per_page():
             "user",
             "job_id",
             "job_array",
+            "job_labels",
             "job_name",
             "job_state",
             "start_time",
diff --git a/clockwork_web/templates/settings.html b/clockwork_web/templates/settings.html
index a2d28e3c..8fefe2ed 100644
--- a/clockwork_web/templates/settings.html
+++ b/clockwork_web/templates/settings.html
@@ -279,6 +279,7 @@ <h1>{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr
 		                            <th>{{ gettext("User (@mila.quebec)") }}</th>
 		                            <th>{{ gettext("Job ID") }}</th>
 		                            <th>{{ gettext("Job array") }}</th>
+		                            <th>{{ gettext("Job labels") }}</th>
 		                            <th>{{ gettext("Job name [:20]") }}</th>
 		                            <th>{{ gettext("Job state") }}</th>
 		                            <th>{{ gettext("Submit time") }}</th>
@@ -291,7 +292,7 @@ <h1>{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr
 		                    <tbody>
 		                    	<tr>
 		                    		{% set page_name = "jobs_list" %}
-									{% for column_name in ["clusters", "user","job_id", "job_array", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
+									{% for column_name in ["clusters", "user","job_id", "job_array", "job_labels", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
 		                    		<td><div class="form-check form-switch">
 										{% if (web_settings | check_web_settings_column_display(page_name, column_name)) %}
 											<input name="{{page_name}}_{{column_name}}_toggle" id="{{page_name}}_{{column_name}}_toggle" type="checkbox" class="form-check-input" onclick="switch_column_setting('{{page_name}}', '{{column_name}}')" checked />
diff --git a/test_common/fake_data.json b/test_common/fake_data.json
index b38bb1a2..c9f97488 100644
--- a/test_common/fake_data.json
+++ b/test_common/fake_data.json
@@ -5962,5 +5962,37 @@
       "tensor_cores": 576,
       "tflops_fp32": 16.31
     }
+  ],
+  "labels": [
+    {
+      "name": "je suis un label 1",
+      "user_id": "student06@mila.quebec",
+      "job_id": 795002
+    },
+    {
+      "name": "je suis un label 2",
+      "user_id": " student16@mila.quebec",
+      "job_id": 606872
+    },
+    {
+      "name": "je suis un label 3",
+      "user_id": "student15@mila.quebec",
+      "job_id": 834395
+    },
+    {
+      "name": "je suis un label 4",
+      "user_id": " student15@mila.quebec",
+      "job_id": 154325
+    },
+    {
+      "name": "je suis un label 5",
+      "user_id": " student15@mila.quebec",
+      "job_id": 154325
+    },
+    {
+      "name": "je suis un label 1",
+      "user_id": "student12@mila.quebec",
+      "job_id": 613024
+    }
   ]
-}
\ No newline at end of file
+}
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index 5061ffc0..757da134 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -70,8 +70,9 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False):
         [("mila_email_username", 1)], name="users_email_index"
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
+    db_insertion_point["labels"].create_index([("user_id", 1), ("job_id", 1)], name="user_id_and_job_id")
 
-    for k in ["users", "jobs", "nodes", "gpu"]:
+    for k in ["users", "jobs", "nodes", "gpu", "labels"]:
         if k in E:
             for e in E[k]:
                 db_insertion_point[k].insert_one(e)

From 50d8f450b632ce07e914d7f6bdba2db80c776be6 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 6 Oct 2023 12:29:17 -0400
Subject: [PATCH 02/19] (WIP)(not working) try to join job and label
 collections to get labels along with jobs.

---
 clockwork_web/core/jobs_helper.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index c896ba0c..5f44ee3a 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -115,6 +115,32 @@ def get_filtered_and_paginated_jobs(
     if not (type(nbr_items_to_display) == int and nbr_items_to_display > 0):
         nbr_items_to_display = None
 
+    aggregation = [{
+        "$match": {
+            "$expr": mongodb_filter
+        }
+    }, {
+        "$lookup": {
+            "from": "labels",
+            "localField": "slurm.job_id",
+            "foreignField": "job_id",
+            "let": {"labelJobField": "$job_id", "labelUserField": "$user_id"},
+            "pipeline": [
+                {
+                    "$match": {
+                        "$expr": {
+                            "$and": [
+                                {"$eq": ["$slurm.job_id", "$$labelJobField"]},
+                                {"$eq": ["$cw.mila_email_username", "$$labelUserField"]},
+                            ]
+                        }
+                    }
+                }
+            ],
+            "as": "job_label",
+        }
+    }]
+
     # Retrieve the database
     mc = get_db()
     # Get the jobs from it
@@ -141,7 +167,7 @@ def get_filtered_and_paginated_jobs(
             sorting.append(["slurm.job_id", 1])
         LD_jobs = list(
             mc["jobs"]
-            .find(mongodb_filter)
+            .aggregate(aggregation)
             .sort(sorting)
             .skip(nbr_skipped_items)
             .limit(nbr_items_to_display)
@@ -155,7 +181,7 @@ def get_filtered_and_paginated_jobs(
         # Moreover, in situations where a lot of data was present,
         # e.g. 1-2 months of historical data, this has caused errors
         # on the server because not enough memory was allocated to perform the sorting.
-        LD_jobs = list(mc["jobs"].find(mongodb_filter))
+        LD_jobs = list(mc["jobs"].aggregate(aggregation))
 
     # Set nbr_total_jobs
     if want_count:

From c93cd52fde482892571898169240550ecce9eef2 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 6 Oct 2023 13:21:52 -0400
Subject: [PATCH 03/19] Finally match jobs to labels with two separate mongodb
 calls. Allow to group by job labels on interface.

---
 clockwork_web/browser_routes/jobs.py     |  2 +
 clockwork_web/core/jobs_helper.py        | 65 ++++++++++++++----------
 clockwork_web/core/search_helper.py      |  3 ++
 clockwork_web/templates/base.html        | 10 ++++
 clockwork_web/templates/jobs_search.html | 17 +++++++
 test_common/fake_data.json               | 10 ++--
 test_common/fake_data.py                 |  4 +-
 7 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py
index 289c751a..b5156f6f 100644
--- a/clockwork_web/browser_routes/jobs.py
+++ b/clockwork_web/browser_routes/jobs.py
@@ -101,6 +101,7 @@ def route_search():
     - "sort_asc" is an optional integer and used to specify if sorting is
       ascending (1) or descending (-1). Default is 1.
     - "job_array" is optional and used to specify the job array in which we are looking for jobs
+    - "job_label" is optional and used to specify the label associated to jobs we are looking for
 
     .. :quickref: list all Slurm job as formatted html
     """
@@ -164,6 +165,7 @@ def route_search():
                 "sort_by": query.sort_by,
                 "sort_asc": query.sort_asc,
                 "job_array": query.job_array,
+                "job_label": query.job_label,
             },
         )
 
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index 5f44ee3a..56825fde 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -115,32 +115,6 @@ def get_filtered_and_paginated_jobs(
     if not (type(nbr_items_to_display) == int and nbr_items_to_display > 0):
         nbr_items_to_display = None
 
-    aggregation = [{
-        "$match": {
-            "$expr": mongodb_filter
-        }
-    }, {
-        "$lookup": {
-            "from": "labels",
-            "localField": "slurm.job_id",
-            "foreignField": "job_id",
-            "let": {"labelJobField": "$job_id", "labelUserField": "$user_id"},
-            "pipeline": [
-                {
-                    "$match": {
-                        "$expr": {
-                            "$and": [
-                                {"$eq": ["$slurm.job_id", "$$labelJobField"]},
-                                {"$eq": ["$cw.mila_email_username", "$$labelUserField"]},
-                            ]
-                        }
-                    }
-                }
-            ],
-            "as": "job_label",
-        }
-    }]
-
     # Retrieve the database
     mc = get_db()
     # Get the jobs from it
@@ -167,7 +141,7 @@ def get_filtered_and_paginated_jobs(
             sorting.append(["slurm.job_id", 1])
         LD_jobs = list(
             mc["jobs"]
-            .aggregate(aggregation)
+            .find(mongodb_filter)
             .sort(sorting)
             .skip(nbr_skipped_items)
             .limit(nbr_items_to_display)
@@ -181,7 +155,24 @@ def get_filtered_and_paginated_jobs(
         # Moreover, in situations where a lot of data was present,
         # e.g. 1-2 months of historical data, this has caused errors
         # on the server because not enough memory was allocated to perform the sorting.
-        LD_jobs = list(mc["jobs"].aggregate(aggregation))
+        LD_jobs = list(mc["jobs"].find(mongodb_filter))
+
+    # Get job labels
+    if LD_jobs:
+        label_map = {}
+        # Collect all labels related to found jobs,
+        # and store them in a dict with keys (user ID, job ID)
+        for label in mc["labels"].find(
+            combine_all_mongodb_filters(
+                {"job_id": {"$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]}}
+            )
+        ):
+            label_map.setdefault((label["user_id"], label["job_id"]), []).append(label)
+        # Populate jobs with labels using job's user email and job ID to find related labels in labels dict.
+        for job in LD_jobs:
+            job["job_labels"] = label_map.get(
+                (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])), []
+            )
 
     # Set nbr_total_jobs
     if want_count:
@@ -261,6 +252,7 @@ def get_jobs(
     sort_by="submit_time",
     sort_asc=-1,
     job_array=None,
+    job_label=None,
 ):
     """
     Set up the filters according to the parameters and retrieve the requested jobs from the database.
@@ -278,6 +270,7 @@ def get_jobs(
         sort_asc                Whether or not to sort in ascending order (1)
                                 or descending order (-1).
         job_array               ID of job array in which we look for jobs.
+        job_label               label (string) we must find in jobs to look for.
 
     Returns:
         A tuple containing:
@@ -285,6 +278,22 @@ def get_jobs(
             - the total number of jobs corresponding of the filters in the databse, if want_count has been set to
             True, None otherwise, as second element
     """
+    # If job label is specified,
+    # get job indices from jobs associated to this label.
+    if job_label is not None:
+        mc = get_db()
+        label_job_ids = [
+            str(label["job_id"])
+            for label in mc["labels"].find(
+                combine_all_mongodb_filters({"name": job_label})
+            )
+        ]
+        if job_ids:
+            # If job ids where provided, make intersection between given job ids and labelled job ids.
+            job_ids = list(set(label_job_ids) & set(job_ids))
+        else:
+            # Otherwise, just use labelled job ids.
+            job_ids = label_job_ids
 
     # Set up and combine filters
     filter = get_global_filter(
diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py
index 8d80b33e..f0e55723 100644
--- a/clockwork_web/core/search_helper.py
+++ b/clockwork_web/core/search_helper.py
@@ -21,6 +21,7 @@ def parse_search_request(user, args, force_pagination=True):
     want_count = to_boolean(want_count)
 
     job_array = args.get("job_array", type=int, default=None)
+    job_label = args.get("job_label", type=str, default=None)
 
     default_page_number = "1" if force_pagination else None
 
@@ -71,6 +72,7 @@ def parse_search_request(user, args, force_pagination=True):
         sort_asc=sort_asc,
         want_count=want_count,
         job_array=job_array,
+        job_label=job_label,
     )
 
     #########################
@@ -115,5 +117,6 @@ def search_request(user, args, force_pagination=True):
         sort_by=query.sort_by,
         sort_asc=query.sort_asc,
         job_array=query.job_array,
+        job_label=query.job_label,
     )
     return (query, jobs, nbr_total_jobs)
diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html
index bf79e7ea..adee968c 100644
--- a/clockwork_web/templates/base.html
+++ b/clockwork_web/templates/base.html
@@ -323,6 +323,9 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
                                 <input type="hidden" name="sort_asc" value="{{ previous_request_args['sort_asc'] }}"/>
                                 {% if previous_request_args['job_array'] is not none %}
                                 <input type="hidden" name="job_array" value="{{ previous_request_args['job_array'] }}"/>
+                                {% endif %}
+                                {% if previous_request_args['job_label'] is not none %}
+                                <input type="hidden" name="job_label" value="{{ previous_request_args['job_label'] }}"/>
                                 {% endif %}
 
 				                 <div class="row align-items-center">
@@ -334,6 +337,13 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
 											 <i class="fa-solid fa-circle-xmark" style="color: #888a85;"></i>
 										 </a>
 										 {% endif %}
+
+										 {% if previous_request_args['job_label'] is not none %}
+										 <a href="{{ modify_query(job_label=none) }}" title="Reset filter by job label" class="px-3 py-2">
+											 Label "{{ previous_request_args['job_label'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
+											 <i class="fa-solid fa-circle-xmark" style="color: #888a85;"></i>
+										 </a>
+										 {% endif %}
 									 </div>
 				                    <!-- button -->
 				                    <div class="col-sm-12 col-md-4 offset-md-8">
diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html
index 53077a6e..45be261e 100644
--- a/clockwork_web/templates/jobs_search.html
+++ b/clockwork_web/templates/jobs_search.html
@@ -101,6 +101,10 @@ <h1>JOBS</h1>
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_array")) %}
                                 <th>Job array</th>
                             {% endif %}
+                            <!-- Job labels header -->
+                            {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
+                                <th>labels</th>
+                            {% endif %}
                             <!-- Job name header -->
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %}
                                 {% set sort_by = "name" %}
@@ -193,6 +197,19 @@ <h1>JOBS</h1>
                                 </td>
                             {% endif %}
 
+                            <!-- Job labels -->
+                            {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
+                                <td>
+                                    {% for D_label in D_job['job_labels'] %}
+                                    <p>
+                                    <a href="{{ modify_query(job_label=D_label['name'], page_num=1) }}" title="Filter by job label &quot;{{D_label['name']}}&quot;">
+                                        {{ D_label['name'] }}
+                                    </a>
+                                    </p>
+                                    {% endfor %}
+                                </td>
+                            {% endif %}
+
                             <!-- Job name -->
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %}
                                 <td>{{D_job['slurm'].get("name", "")[0:20]}}</td>
diff --git a/test_common/fake_data.json b/test_common/fake_data.json
index c9f97488..30667bf9 100644
--- a/test_common/fake_data.json
+++ b/test_common/fake_data.json
@@ -5971,7 +5971,7 @@
     },
     {
       "name": "je suis un label 2",
-      "user_id": " student16@mila.quebec",
+      "user_id": "student16@mila.quebec",
       "job_id": 606872
     },
     {
@@ -5980,13 +5980,13 @@
       "job_id": 834395
     },
     {
-      "name": "je suis un label 4",
-      "user_id": " student15@mila.quebec",
+      "name": "je suis un label 3",
+      "user_id": "student15@mila.quebec",
       "job_id": 154325
     },
     {
-      "name": "je suis un label 5",
-      "user_id": " student15@mila.quebec",
+      "name": "je suis un label 4",
+      "user_id": "student15@mila.quebec",
       "job_id": 154325
     },
     {
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index 757da134..1017c554 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -70,7 +70,9 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False):
         [("mila_email_username", 1)], name="users_email_index"
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
-    db_insertion_point["labels"].create_index([("user_id", 1), ("job_id", 1)], name="user_id_and_job_id")
+    db_insertion_point["labels"].create_index(
+        [("user_id", 1), ("job_id", 1), ("name", 1)], name="job_label_index"
+    )
 
     for k in ["users", "jobs", "nodes", "gpu", "labels"]:
         if k in E:

From fcc3f6b4bf0804bdf95ef8ef37c61cb7407871d6 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 6 Oct 2023 14:27:54 -0400
Subject: [PATCH 04/19] Fix unit tests.

---
 clockwork_web/core/jobs_helper.py        | 20 ++++++++++++++------
 clockwork_web/templates/jobs_search.html |  2 +-
 test_common/fake_data.py                 | 12 ++++++++++++
 test_common/jobs_test_helpers.py         | 12 ++++--------
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index 56825fde..96b6819b 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -162,17 +162,25 @@ def get_filtered_and_paginated_jobs(
         label_map = {}
         # Collect all labels related to found jobs,
         # and store them in a dict with keys (user ID, job ID)
-        for label in mc["labels"].find(
-            combine_all_mongodb_filters(
-                {"job_id": {"$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]}}
+        for label in list(
+            mc["labels"].find(
+                combine_all_mongodb_filters(
+                    {
+                        "job_id": {
+                            "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]
+                        }
+                    }
+                )
             )
         ):
+            # Remove MongoDB identifier, as we won't use it.
+            label.pop("_id")
             label_map.setdefault((label["user_id"], label["job_id"]), []).append(label)
         # Populate jobs with labels using job's user email and job ID to find related labels in labels dict.
         for job in LD_jobs:
-            job["job_labels"] = label_map.get(
-                (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"])), []
-            )
+            key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]))
+            if key in label_map:
+                job["job_labels"] = label_map[key]
 
     # Set nbr_total_jobs
     if want_count:
diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html
index 45be261e..fdc364a1 100644
--- a/clockwork_web/templates/jobs_search.html
+++ b/clockwork_web/templates/jobs_search.html
@@ -200,7 +200,7 @@ <h1>JOBS</h1>
                             <!-- Job labels -->
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
                                 <td>
-                                    {% for D_label in D_job['job_labels'] %}
+                                    {% for D_label in D_job.get('job_labels', []) %}
                                     <p>
                                     <a href="{{ modify_query(job_label=D_label['name'], page_num=1) }}" title="Filter by job label &quot;{{D_label['name']}}&quot;">
                                         {{ D_label['name'] }}
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index 1017c554..cf3cf360 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -20,6 +20,15 @@ def fake_data():
     )
     with open(json_file, "r") as f:
         E = json.load(f)
+
+    # Add labels to jobs
+    for job in E["jobs"]:
+        job_id = int(job["slurm"]["job_id"])
+        user_id = job["cw"]["mila_email_username"]
+        for label in E["labels"]:
+            if label["job_id"] == job_id and label["user_id"] == user_id:
+                job.setdefault("job_labels", []).append(label)
+
     mutate_some_job_status(E)
     return E
 
@@ -99,6 +108,9 @@ def cleanup_function():
         for e in E["gpu"]:
             db_insertion_point["gpu"].delete_many({"name": e["name"]})
 
+        for e in E["labels"]:
+            db_insertion_point["labels"].delete_many({"name": e["name"]})
+
         for (k, sub, id_field) in [
             ("jobs", "slurm", "job_id"),
             ("nodes", "slurm", "name"),
diff --git a/test_common/jobs_test_helpers.py b/test_common/jobs_test_helpers.py
index 5804a2ec..b66ac7af 100644
--- a/test_common/jobs_test_helpers.py
+++ b/test_common/jobs_test_helpers.py
@@ -34,11 +34,8 @@ def helper_single_job_at_random(fake_data, cluster_name):
 
     def validator(D_job):
         for k1 in original_D_job:
-            assert k1 in ["slurm", "cw", "user"]
-            for k2 in original_D_job[k1]:
-                assert (
-                    D_job[k1][k2] == original_D_job[k1][k2]
-                ), f"{D_job}\n{original_D_job}"
+            assert k1 in ["slurm", "cw", "user", "job_labels"]
+            assert D_job[k1] == original_D_job[k1], f"{D_job}\n{original_D_job}"
 
     return validator, job_id
 
@@ -167,8 +164,7 @@ def validator(LD_jobs):
         # compare all the dicts one by one
         for (D_job, D_original_job) in zip(LD_jobs, LD_original_jobs):
             for k1 in D_original_job:
-                assert k1 in ["slurm", "cw", "user"]
-                for k2 in D_original_job[k1]:
-                    assert D_job[k1][k2] == D_original_job[k1][k2]
+                assert k1 in ["slurm", "cw", "user", "job_labels"]
+                assert D_job[k1] == D_original_job[k1]
 
     return validator

From 0221910d10061ae10c3b24bc55d749d4fe70740d Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Sun, 12 Nov 2023 18:10:29 -0500
Subject: [PATCH 05/19] Make sure to set job label to None when deselected

---
 clockwork_web/core/search_helper.py | 2 +-
 clockwork_web/templates/base.html   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py
index f0e55723..e46c2072 100644
--- a/clockwork_web/core/search_helper.py
+++ b/clockwork_web/core/search_helper.py
@@ -21,7 +21,7 @@ def parse_search_request(user, args, force_pagination=True):
     want_count = to_boolean(want_count)
 
     job_array = args.get("job_array", type=int, default=None)
-    job_label = args.get("job_label", type=str, default=None)
+    job_label = args.get("job_label", type=str, default=None) or None
 
     default_page_number = "1" if force_pagination else None
 
diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html
index adee968c..57052268 100644
--- a/clockwork_web/templates/base.html
+++ b/clockwork_web/templates/base.html
@@ -24,7 +24,7 @@
 			<script src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.13.2/jquery-ui.min.js" integrity="sha512-57oZ/vW8ANMjR/KQ6Be9v/+/h6bq9/l3f0Oc7vn6qMqyhvPd1cvKBRWWpzu0QoneImqr2SkmO4MSqU+RpHom3Q==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
 
 			<script type="text/javascript" src="{{ url_for('static', filename='js/sortable.min.js') }}"></script>
-			
+
 			<script type="text/javascript" src="{{ url_for('static', filename='js/moment-with-locales.min.js') }}"></script>
 			<script type="text/javascript" src="{{ url_for('static', filename='js/livestamp.min.js') }}"></script>
 
@@ -339,7 +339,7 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
 										 {% endif %}
 
 										 {% if previous_request_args['job_label'] is not none %}
-										 <a href="{{ modify_query(job_label=none) }}" title="Reset filter by job label" class="px-3 py-2">
+										 <a href="{{ modify_query(job_label='') }}" title="Reset filter by job label" class="px-3 py-2">
 											 Label "{{ previous_request_args['job_label'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
 											 <i class="fa-solid fa-circle-xmark" style="color: #888a85;"></i>
 										 </a>

From 391f004f2bbc85fcf644a8778d3ada0293ec6325 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 16 Feb 2024 09:41:11 -0500
Subject: [PATCH 06/19] Allow to display job-user dicts

---
 clockwork_web/browser_routes/jobs.py     |  6 ++--
 clockwork_web/core/jobs_helper.py        | 21 +++++++-----
 clockwork_web/core/search_helper.py      |  9 ++++--
 clockwork_web/templates/base.html        | 13 +++++---
 clockwork_web/templates/jobs_search.html |  7 ++--
 test_common/fake_data.json               | 41 +++++++++++++++---------
 test_common/fake_data.py                 | 11 ++++---
 7 files changed, 68 insertions(+), 40 deletions(-)

diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py
index b5156f6f..b3dffbfb 100644
--- a/clockwork_web/browser_routes/jobs.py
+++ b/clockwork_web/browser_routes/jobs.py
@@ -101,7 +101,8 @@ def route_search():
     - "sort_asc" is an optional integer and used to specify if sorting is
       ascending (1) or descending (-1). Default is 1.
     - "job_array" is optional and used to specify the job array in which we are looking for jobs
-    - "job_label" is optional and used to specify the label associated to jobs we are looking for
+    - "job_label_name" is optional and used to specify the label name associated to jobs we are looking for
+    - "job_label_content" is optional and used to specify the label value associated to jobs we are looking for
 
     .. :quickref: list all Slurm job as formatted html
     """
@@ -165,7 +166,8 @@ def route_search():
                 "sort_by": query.sort_by,
                 "sort_asc": query.sort_asc,
                 "job_array": query.job_array,
-                "job_label": query.job_label,
+                "job_label_name": query.job_label_name,
+                "job_label_content": query.job_label_content,
             },
         )
 
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index 96b6819b..fd9e58cd 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -161,7 +161,7 @@ def get_filtered_and_paginated_jobs(
     if LD_jobs:
         label_map = {}
         # Collect all labels related to found jobs,
-        # and store them in a dict with keys (user ID, job ID)
+        # and store them in a dict with keys (user ID, job ID, cluster_name)
         for label in list(
             mc["labels"].find(
                 combine_all_mongodb_filters(
@@ -175,10 +175,13 @@ def get_filtered_and_paginated_jobs(
         ):
             # Remove MongoDB identifier, as we won't use it.
             label.pop("_id")
-            label_map.setdefault((label["user_id"], label["job_id"]), []).append(label)
-        # Populate jobs with labels using job's user email and job ID to find related labels in labels dict.
+            key = (label["user_id"], label["job_id"], label["cluster_name"])
+            assert key not in label_map
+            label_map[key] = label["labels"]
+        # Populate jobs with labels using job's user email,  job ID and cluster name
+        # to find related labels in labels dict.
         for job in LD_jobs:
-            key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]))
+            key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]), job["slurm"]["cluster_name"])
             if key in label_map:
                 job["job_labels"] = label_map[key]
 
@@ -260,7 +263,8 @@ def get_jobs(
     sort_by="submit_time",
     sort_asc=-1,
     job_array=None,
-    job_label=None,
+    job_label_name=None,
+    job_label_content=None,
 ):
     """
     Set up the filters according to the parameters and retrieve the requested jobs from the database.
@@ -278,7 +282,8 @@ def get_jobs(
         sort_asc                Whether or not to sort in ascending order (1)
                                 or descending order (-1).
         job_array               ID of job array in which we look for jobs.
-        job_label               label (string) we must find in jobs to look for.
+        job_label_name          name of label (string) we must find in jobs to look for.
+        job_label_content       content of label (string) we must find in jobs to look for.
 
     Returns:
         A tuple containing:
@@ -288,12 +293,12 @@ def get_jobs(
     """
     # If job label is specified,
     # get job indices from jobs associated to this label.
-    if job_label is not None:
+    if job_label_name is not None and job_label_content is not None:
         mc = get_db()
         label_job_ids = [
             str(label["job_id"])
             for label in mc["labels"].find(
-                combine_all_mongodb_filters({"name": job_label})
+                combine_all_mongodb_filters({f"labels.{job_label_name}": job_label_content})
             )
         ]
         if job_ids:
diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py
index e46c2072..54e6a75c 100644
--- a/clockwork_web/core/search_helper.py
+++ b/clockwork_web/core/search_helper.py
@@ -21,7 +21,8 @@ def parse_search_request(user, args, force_pagination=True):
     want_count = to_boolean(want_count)
 
     job_array = args.get("job_array", type=int, default=None)
-    job_label = args.get("job_label", type=str, default=None) or None
+    job_label_name = args.get("job_label_name", type=str, default=None) or None
+    job_label_content = args.get("job_label_content", type=str, default=None) or None
 
     default_page_number = "1" if force_pagination else None
 
@@ -72,7 +73,8 @@ def parse_search_request(user, args, force_pagination=True):
         sort_asc=sort_asc,
         want_count=want_count,
         job_array=job_array,
-        job_label=job_label,
+        job_label_name=job_label_name,
+        job_label_content=job_label_content,
     )
 
     #########################
@@ -117,6 +119,7 @@ def search_request(user, args, force_pagination=True):
         sort_by=query.sort_by,
         sort_asc=query.sort_asc,
         job_array=query.job_array,
-        job_label=query.job_label,
+        job_label_name=query.job_label_name,
+        job_label_content=query.job_label_content,
     )
     return (query, jobs, nbr_total_jobs)
diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html
index 57052268..51d8c7d8 100644
--- a/clockwork_web/templates/base.html
+++ b/clockwork_web/templates/base.html
@@ -324,8 +324,11 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
                                 {% if previous_request_args['job_array'] is not none %}
                                 <input type="hidden" name="job_array" value="{{ previous_request_args['job_array'] }}"/>
                                 {% endif %}
-                                {% if previous_request_args['job_label'] is not none %}
-                                <input type="hidden" name="job_label" value="{{ previous_request_args['job_label'] }}"/>
+                                {% if previous_request_args['job_label_name'] is not none %}
+                                <input type="hidden" name="job_label_name" value="{{ previous_request_args['job_label_name'] }}"/>
+                                {% endif %}
+                                {% if previous_request_args['job_label_content'] is not none %}
+                                <input type="hidden" name="job_label_content" value="{{ previous_request_args['job_label_content'] }}"/>
                                 {% endif %}
 
 				                 <div class="row align-items-center">
@@ -338,9 +341,9 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
 										 </a>
 										 {% endif %}
 
-										 {% if previous_request_args['job_label'] is not none %}
-										 <a href="{{ modify_query(job_label='') }}" title="Reset filter by job label" class="px-3 py-2">
-											 Label "{{ previous_request_args['job_label'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
+										 {% if previous_request_args['job_label_name'] is not none and previous_request_args['job_label_content'] is not none %}
+										 <a href="{{ modify_query(job_label_name='', job_label_content='') }}" title="Reset filter by job label" class="px-3 py-2">
+											 Label <strong>{{ previous_request_args['job_label_name'] }}</strong>: "{{ previous_request_args['job_label_content'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
 											 <i class="fa-solid fa-circle-xmark" style="color: #888a85;"></i>
 										 </a>
 										 {% endif %}
diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html
index fdc364a1..2bd20321 100644
--- a/clockwork_web/templates/jobs_search.html
+++ b/clockwork_web/templates/jobs_search.html
@@ -200,10 +200,11 @@ <h1>JOBS</h1>
                             <!-- Job labels -->
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
                                 <td>
-                                    {% for D_label in D_job.get('job_labels', []) %}
+                                    {% for D_label_name, D_label_content in D_job.get('job_labels', {}).items() %}
                                     <p>
-                                    <a href="{{ modify_query(job_label=D_label['name'], page_num=1) }}" title="Filter by job label &quot;{{D_label['name']}}&quot;">
-                                        {{ D_label['name'] }}
+                                    <a href="{{ modify_query(job_label_name=D_label_name, job_label_content=D_label_content, page_num=1) }}" title="Filter by job label &quot;{{D_label_name}}&quot;: &quot;{{D_label_content}}&quot;">
+                                        <strong>{{ D_label_name }}</strong><br/>
+                                        {{ D_label_content }}
                                     </a>
                                     </p>
                                     {% endfor %}
diff --git a/test_common/fake_data.json b/test_common/fake_data.json
index 30667bf9..0ffcbee2 100644
--- a/test_common/fake_data.json
+++ b/test_common/fake_data.json
@@ -5965,34 +5965,45 @@
   ],
   "labels": [
     {
-      "name": "je suis un label 1",
       "user_id": "student06@mila.quebec",
-      "job_id": 795002
+      "job_id": 795002,
+      "cluster_name": "mila",
+      "labels": {
+        "name": "je suis un label 1"
+      }
     },
     {
-      "name": "je suis un label 2",
       "user_id": "student16@mila.quebec",
-      "job_id": 606872
-    },
-    {
-      "name": "je suis un label 3",
-      "user_id": "student15@mila.quebec",
-      "job_id": 834395
+      "job_id": 606872,
+      "cluster_name": "mila",
+      "labels": {
+        "name": "je suis un label 2"
+      }
     },
     {
-      "name": "je suis un label 3",
       "user_id": "student15@mila.quebec",
-      "job_id": 154325
+      "job_id": 834395,
+      "cluster_name": "graham",
+      "labels": {
+        "name": "je suis un label 3"
+      }
     },
     {
-      "name": "je suis un label 4",
       "user_id": "student15@mila.quebec",
-      "job_id": 154325
+      "job_id": 154325,
+      "cluster_name": "graham",
+      "labels": {
+        "name": "je suis un label 3",
+        "name2": "je suis un label 4"
+      }
     },
     {
-      "name": "je suis un label 1",
       "user_id": "student12@mila.quebec",
-      "job_id": 613024
+      "job_id": 613024,
+      "cluster_name": "graham",
+      "labels": {
+        "name": "je suis un label 1"
+      }
     }
   ]
 }
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index cf3cf360..bf14e314 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -25,9 +25,10 @@ def fake_data():
     for job in E["jobs"]:
         job_id = int(job["slurm"]["job_id"])
         user_id = job["cw"]["mila_email_username"]
+        cluster_name = job["slurm"]["cluster_name"]
         for label in E["labels"]:
-            if label["job_id"] == job_id and label["user_id"] == user_id:
-                job.setdefault("job_labels", []).append(label)
+            if label["job_id"] == job_id and label["user_id"] == user_id and label["cluster_name"] == cluster_name:
+                job["job_labels"] = label["labels"]
 
     mutate_some_job_status(E)
     return E
@@ -80,7 +81,7 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False):
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
     db_insertion_point["labels"].create_index(
-        [("user_id", 1), ("job_id", 1), ("name", 1)], name="job_label_index"
+        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], name="job_label_index"
     )
 
     for k in ["users", "jobs", "nodes", "gpu", "labels"]:
@@ -109,7 +110,9 @@ def cleanup_function():
             db_insertion_point["gpu"].delete_many({"name": e["name"]})
 
         for e in E["labels"]:
-            db_insertion_point["labels"].delete_many({"name": e["name"]})
+            copy_e = e
+            copy_e.pop("labels")
+            db_insertion_point["labels"].delete_many(copy_e)
 
         for (k, sub, id_field) in [
             ("jobs", "slurm", "job_id"),

From eb751d5e42df65ff0ba598899b17b850f14899e3 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Fri, 16 Feb 2024 10:27:00 -0500
Subject: [PATCH 07/19] Format code.

---
 clockwork_web/core/jobs_helper.py | 10 ++++++++--
 test_common/fake_data.py          |  9 +++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index fd9e58cd..f06e3f74 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -181,7 +181,11 @@ def get_filtered_and_paginated_jobs(
         # Populate jobs with labels using job's user email,  job ID and cluster name
         # to find related labels in labels dict.
         for job in LD_jobs:
-            key = (job["cw"]["mila_email_username"], int(job["slurm"]["job_id"]), job["slurm"]["cluster_name"])
+            key = (
+                job["cw"]["mila_email_username"],
+                int(job["slurm"]["job_id"]),
+                job["slurm"]["cluster_name"],
+            )
             if key in label_map:
                 job["job_labels"] = label_map[key]
 
@@ -298,7 +302,9 @@ def get_jobs(
         label_job_ids = [
             str(label["job_id"])
             for label in mc["labels"].find(
-                combine_all_mongodb_filters({f"labels.{job_label_name}": job_label_content})
+                combine_all_mongodb_filters(
+                    {f"labels.{job_label_name}": job_label_content}
+                )
             )
         ]
         if job_ids:
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index bf14e314..0677e267 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -27,7 +27,11 @@ def fake_data():
         user_id = job["cw"]["mila_email_username"]
         cluster_name = job["slurm"]["cluster_name"]
         for label in E["labels"]:
-            if label["job_id"] == job_id and label["user_id"] == user_id and label["cluster_name"] == cluster_name:
+            if (
+                label["job_id"] == job_id
+                and label["user_id"] == user_id
+                and label["cluster_name"] == cluster_name
+            ):
                 job["job_labels"] = label["labels"]
 
     mutate_some_job_status(E)
@@ -81,7 +85,8 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False):
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
     db_insertion_point["labels"].create_index(
-        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)], name="job_label_index"
+        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)],
+        name="job_label_index",
     )
 
     for k in ["users", "jobs", "nodes", "gpu", "labels"]:

From c4d94eada256b71b21a37bedb226161400094ff1 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 19 Feb 2024 19:27:08 -0500
Subject: [PATCH 08/19] Improve get_jobs(): filter labels using current user in
 get_filtered_and_paginated_jobs()

---
 clockwork_web/core/jobs_helper.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index f06e3f74..30f82354 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -7,6 +7,7 @@
 import time
 
 from flask.globals import current_app
+from flask_login import current_user
 from ..db import get_db
 
 
@@ -168,7 +169,8 @@ def get_filtered_and_paginated_jobs(
                     {
                         "job_id": {
                             "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]
-                        }
+                        },
+                        "user_id": current_user.mila_email_username,
                     }
                 )
             )
@@ -178,16 +180,19 @@ def get_filtered_and_paginated_jobs(
             key = (label["user_id"], label["job_id"], label["cluster_name"])
             assert key not in label_map
             label_map[key] = label["labels"]
-        # Populate jobs with labels using job's user email,  job ID and cluster name
-        # to find related labels in labels dict.
-        for job in LD_jobs:
-            key = (
-                job["cw"]["mila_email_username"],
-                int(job["slurm"]["job_id"]),
-                job["slurm"]["cluster_name"],
-            )
-            if key in label_map:
-                job["job_labels"] = label_map[key]
+
+        if label_map:
+            # Populate jobs with labels using job's user email,  job ID and cluster name
+            # to find related labels in labels dict.
+            for job in LD_jobs:
+                key = (
+                    # job["cw"]["mila_email_username"],
+                    current_user.mila_email_username,
+                    int(job["slurm"]["job_id"]),
+                    job["slurm"]["cluster_name"],
+                )
+                if key in label_map:
+                    job["job_labels"] = label_map[key]
 
     # Set nbr_total_jobs
     if want_count:

From 105ecbd183b193718522cc82829a8d77c1d7531c Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Tue, 20 Feb 2024 07:27:31 -0500
Subject: [PATCH 09/19] Add a specific improved function
 _jobs_are_old(cluster_name) to tell if cluster jobs are old. Much faster to
 use than get_jobs().

---
 clockwork_web/core/users_helper.py | 43 ++++++++++++++++--------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/clockwork_web/core/users_helper.py b/clockwork_web/core/users_helper.py
index 86862e64..30ce7265 100644
--- a/clockwork_web/core/users_helper.py
+++ b/clockwork_web/core/users_helper.py
@@ -592,19 +592,30 @@ def render_template_with_user_settings(template_name_or_list, **context):
 
     # Get cluster status (if jobs are old and cluster has error).
     for cluster_name in context["clusters"]:
-        # Default status values.
-        jobs_are_old = False
+        # Cluster error cannot yet be checked, so
+        # cluster_has_error is always False for now.
         cluster_has_error = False
+        context["clusters"][cluster_name]["status"] = {
+            "jobs_are_old": _jobs_are_old(cluster_name),
+            "cluster_has_error": cluster_has_error,
+        }
 
-        # Check if jobs are old.
-        jobs, _ = get_jobs(cluster_names=[cluster_name])
-        job_dates = [
-            job["cw"]["last_slurm_update"]
-            for job in jobs
-            if "last_slurm_update" in job["cw"]
-        ]
-        if job_dates:
-            most_recent_job_edition = max(job_dates)
+    return render_template(template_name_or_list, **context)
+
+
+def _jobs_are_old(cluster_name):
+    jobs_are_old = False
+
+    mongodb_filter = {"slurm.cluster_name": cluster_name}
+    mc = get_db()
+    job_with_max_cw_last_slurm_update = list(
+        mc["jobs"].find(mongodb_filter).sort([("cw.last_slurm_update", -1)]).limit(1)
+    )
+
+    if job_with_max_cw_last_slurm_update:
+        (job,) = job_with_max_cw_last_slurm_update
+        if "last_slurm_update" in job["cw"]:
+            most_recent_job_edition = job["cw"]["last_slurm_update"]
             current_timestamp = datetime.now().timestamp()
             elapsed_time = timedelta(
                 seconds=current_timestamp - most_recent_job_edition
@@ -613,12 +624,4 @@ def render_template_with_user_settings(template_name_or_list, **context):
             max_delay = timedelta(days=30)
             jobs_are_old = elapsed_time > max_delay
 
-        # Cluster error cannot yet be checked, so
-        # cluster_has_error is always False for now.
-
-        context["clusters"][cluster_name]["status"] = {
-            "jobs_are_old": jobs_are_old,
-            "cluster_has_error": cluster_has_error,
-        }
-
-    return render_template(template_name_or_list, **context)
+    return jobs_are_old

From d3b574e98938f9c2363f635449c88e763387ab96 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Tue, 20 Feb 2024 07:28:21 -0500
Subject: [PATCH 10/19] Add a specific script to populate database with huge
 fake data, adding about 1 000 000 jobs and job-user dicts.

---
 scripts/store_huge_fake_data_in_db.py | 509 ++++++++++++++++++++++++++
 1 file changed, 509 insertions(+)
 create mode 100644 scripts/store_huge_fake_data_in_db.py

diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
new file mode 100644
index 00000000..c9471eae
--- /dev/null
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -0,0 +1,509 @@
+"""
+Variation du temps de requête en fonction du nombre de dictionnaires job-utilisateur
+Pour un nombre de jobs fixes = n:
+    0 à n dicts de 1 prop chacun
+        --nb-dicts
+    1 à k props pour chacun des n dicts
+        --nb-props
+Variation du temps de requête en fonction du nombre de jobs dans la DB
+    Avec 0 dicts: 0 à n jobs
+        --nb-jobs
+    Avec n dicts de k props: 0 à n jobs
+        --nb-jobs
+
+n = 19
+--nb-jobs: 0 à n => 2 ** 0 à 2 ** n
+--nb-dicts: 0 à n => 2 ** 0 à 2 ** n
+--nb-props: 1 à k
+
+Paramètres:
+--nb-jobs   --nb-dicts  --nb-props-per-dict
+n           0           1
+n           ...         1
+n           n           1
+n           n           ...
+n           n           k
+
+0           0           1
+...         0           1
+n           0           1
+------------VS-----------
+0           n           k
+...         n           k
+n           n           k
+"""
+
+import argparse
+import sys
+from datetime import datetime
+
+from clockwork_web.config import register_config
+from slurm_state.mongo_client import get_mongo_client
+from slurm_state.config import get_config
+
+USERS = [
+    {
+        "mila_email_username": "student00@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa00",
+        "mila_cluster_username": "milauser00",
+        "cc_account_username": "ccuser00",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student01@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa01",
+        "mila_cluster_username": "milauser01",
+        "cc_account_username": "ccuser01",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student02@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa02",
+        "mila_cluster_username": "milauser02",
+        "cc_account_username": "ccuser02",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student03@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa03",
+        "mila_cluster_username": "milauser03",
+        "cc_account_username": "ccuser03",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student04@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa04",
+        "mila_cluster_username": "milauser04",
+        "cc_account_username": "ccuser04",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student05@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa05",
+        "mila_cluster_username": "milauser05",
+        "cc_account_username": "ccuser05",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student06@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa06",
+        "mila_cluster_username": "milauser06",
+        "cc_account_username": None,
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student07@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa07",
+        "mila_cluster_username": "milauser07",
+        "cc_account_username": "ccuser07",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student08@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa08",
+        "mila_cluster_username": "milauser08",
+        "cc_account_username": "ccuser08",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student09@mila.quebec",
+        "status": "disabled",
+        "clockwork_api_key": "000aaa09",
+        "mila_cluster_username": "milauser09",
+        "cc_account_username": "ccuser09",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student10@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa10",
+        "mila_cluster_username": "milauser10",
+        "cc_account_username": "ccuser10",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student11@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa11",
+        "mila_cluster_username": "milauser11",
+        "cc_account_username": "ccuser11",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student12@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa12",
+        "mila_cluster_username": "milauser12",
+        "cc_account_username": "ccuser12",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student13@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa13",
+        "mila_cluster_username": "milauser13",
+        "cc_account_username": "ccuser13",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student14@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa14",
+        "mila_cluster_username": "milauser14",
+        "cc_account_username": "ccuser14",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student15@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa15",
+        "mila_cluster_username": "milauser15",
+        "cc_account_username": "ccuser15",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student16@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa16",
+        "mila_cluster_username": "milauser16",
+        "cc_account_username": "ccuser16",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student17@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa17",
+        "mila_cluster_username": "milauser17",
+        "cc_account_username": "ccuser17",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+    {
+        "mila_email_username": "student18@mila.quebec",
+        "status": "enabled",
+        "clockwork_api_key": "000aaa18",
+        "mila_cluster_username": "milauser18",
+        "cc_account_username": "ccuser18",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "en",
+        },
+    },
+    {
+        "mila_email_username": "student19@mila.quebec",
+        "status": "disabled",
+        "clockwork_api_key": "000aaa19",
+        "mila_cluster_username": "milauser19",
+        "cc_account_username": "ccuser19",
+        "cc_account_update_key": None,
+        "web_settings": {
+            "nbr_items_per_page": 40,
+            "dark_mode": False,
+            "language": "fr",
+        },
+    },
+]
+BASE_JOB_SLURM = {
+    "account": "def-patate-rrg",
+    "cluster_name": "beluga",
+    "time_limit": 4320,
+    "submit_time": 1681680327,
+    "start_time": 0,
+    "end_time": 0,
+    "exit_code": "SUCCESS:0",
+    "array_job_id": "0",
+    "array_task_id": "None",
+    "job_id": "197775",
+    "name": "somejobname_507716",
+    "nodes": "None assigned",
+    "partition": "other_fun_partition",
+    "job_state": "PENDING",
+    "tres_allocated": {},
+    "tres_requested": {
+        "num_cpus": 80,
+        "mem": 95000,
+        "num_nodes": 1,
+        "billing": 80,
+    },
+    "username": "ccuser02",
+    "working_directory": "/a809/b333/c569",
+}
+BASE_JOB_CW = {
+    "mila_email_username": "student02@mila.quebec",
+    "last_slurm_update": 1686248596.476063,
+    "last_slurm_update_by_sacct": 1686248596.476063,
+}
+BASE_USER_JOB_DICT = {
+    "user_id": "student00@mila.quebec",
+    "job_id": 795002,
+    "cluster_name": "beluga",
+    "labels": {f"name_{i + 1}": f"i am a label {i + 1}" for i in range(4)},
+}
+
+
+def _generate_huge_fake_data(with_labels=False):
+    nb_jobs_per_user = [2**i for i in range(len(USERS))]
+    assert len(nb_jobs_per_user) == len(USERS)
+    nb_user_job_dicts = sum(nb_jobs_per_user)
+    jobs = []
+    labels = []
+
+    # populate jobs
+    job_id = 0
+    for user, nb_user_jobs in zip(USERS, nb_jobs_per_user):
+        for i in range(nb_user_jobs):
+            job_id += 1
+            job_slurm = BASE_JOB_SLURM.copy()
+            job_cw = BASE_JOB_CW.copy()
+            # edit slurm.job_id
+            job_slurm["job_id"] = str(job_id)
+            # edit slurm.name
+            job_slurm["name"] = f"job_name_{job_id}"
+            # edit slurm.username
+            job_slurm["username"] = user["cc_account_username"]
+            # edit cw.mila_email_username
+            job_cw["mila_email_username"] = user["mila_email_username"]
+            jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
+    print("Nb. jobs:", job_id)
+    assert job_id == nb_user_job_dicts
+
+    if with_labels:
+        # populate labels
+        for i in range(nb_user_job_dicts):
+            user_job_dict = BASE_USER_JOB_DICT.copy()
+            # edit job_id
+            user_job_dict["job_id"] = i + 1
+            labels.append(user_job_dict)
+
+    return {"users": USERS, "jobs": jobs, "labels": labels}
+
+
+def populate_fake_data(db_insertion_point, labels=False):
+    print("Generating huge fake data")
+    E = _generate_huge_fake_data(with_labels=labels)
+    print("Generated huge fake data")
+
+    # Create indices. This isn't half as important as when we're
+    # dealing with large quantities of data, but it's part of the
+    # set up for the database.
+    db_insertion_point["jobs"].create_index(
+        [("slurm.job_id", 1), ("slurm.cluster_name", 1)],
+        name="job_id_and_cluster_name",
+    )
+    db_insertion_point["nodes"].create_index(
+        [("slurm.name", 1), ("slurm.cluster_name", 1)],
+        name="name_and_cluster_name",
+    )
+    db_insertion_point["users"].create_index(
+        [("mila_email_username", 1)], name="users_email_index"
+    )
+    db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
+    db_insertion_point["labels"].create_index(
+        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)],
+        name="job_label_index",
+    )
+
+    for k in ["users", "jobs", "nodes", "gpu", "labels"]:
+        if k in E and E[k]:
+            print("Inserting", k)
+            db_insertion_point[k].insert_many(E[k])
+            print("Inserted", k)
+
+    def cleanup_function():
+        """
+        Each of those kinds of data is identified in a unique way,
+        and we can use that identifier to clean up.
+
+        For example, when clearing out jobs, we can look at the "job_id"
+        of the entries that we inserted.
+
+        The point is that we can run a test against the production mongodb on Atlas
+        and not affect the real data. If we cleared the tables completely,
+        then we'd be affecting the real data in a bad way.
+        """
+        for e in E["users"]:
+            db_insertion_point["users"].delete_many(
+                {"mila_email_username": e["mila_email_username"]}
+            )
+
+        for e in E["gpu"]:
+            db_insertion_point["gpu"].delete_many({"name": e["name"]})
+
+        for e in E["labels"]:
+            copy_e = e
+            copy_e.pop("labels")
+            db_insertion_point["labels"].delete_many(copy_e)
+
+        for (k, sub, id_field) in [
+            ("jobs", "slurm", "job_id"),
+            ("nodes", "slurm", "name"),
+        ]:
+            if k in E:
+                for e in E[k]:
+                    # This is complicated, but it's just about a way to say something like
+                    # that we want to remove {"slurm.job_id", e["slurm"]["job_id"]},
+                    # and the weird notation comes from the fact that mongodb filters use dots,
+                    # but not the original python.
+                    db_insertion_point[k].delete_many(
+                        {f"{sub}.{id_field}": e[sub][id_field]}
+                    )
+
+    return cleanup_function
+
+
+def store_data_in_db(labels=False):
+    # Open the database and insert the contents.
+    client = get_mongo_client()
+    populate_fake_data(client[get_config("mongo.database_name")], labels=labels)
+
+
+def modify_timestamps(data):
+    """
+    This function updates the timestamps in order to simulate jobs which have
+    been launched more recently than they were.
+    """
+    # Retrieve the most recent timestamp (ie its end_time)
+    most_recent_timestamp = data["jobs"][0]["slurm"]["end_time"]
+    # most_recent_timestamp = min(job["slurm"]["end_time"] for job in data["jobs"])
+    for job in data["jobs"]:
+        new_end_time = job["slurm"]["end_time"]
+        if new_end_time:
+            if new_end_time > most_recent_timestamp:
+                most_recent_timestamp = new_end_time
+
+    # Retrieve the time interval between this timestamp and now
+    time_delta = datetime.now().timestamp() - most_recent_timestamp
+
+    # Substract it to the timestamps of the jobs
+    for job in data["jobs"]:
+        if job["slurm"]["submit_time"]:
+            job["slurm"]["submit_time"] += time_delta
+        if job["slurm"]["start_time"]:
+            job["slurm"]["start_time"] += time_delta
+        if job["slurm"]["end_time"]:
+            job["slurm"]["end_time"] += time_delta
+
+
+def main(argv):
+    # Retrieve the arguments passed to the script
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--labels",
+        type=bool,
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Add fake job-user dicts. If False (default), only add jobs.",
+    )
+    args = parser.parse_args(argv[1:])
+    print(args)
+
+    # Register the elements to access the database
+    register_config("mongo.connection_string", "")
+    register_config("mongo.database_name", "clockwork")
+
+    # Store the generated fake data in the database
+    store_data_in_db(labels=args.labels)
+
+
+if __name__ == "__main__":
+    main(sys.argv)

From 4f6459dc09e07e6e8027fbf6ee8b70dd35834d14 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Tue, 20 Feb 2024 07:36:58 -0500
Subject: [PATCH 11/19] Make job-user dicts more specific to each job.

---
 scripts/store_huge_fake_data_in_db.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index c9471eae..2b901a65 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -333,12 +333,6 @@
     "last_slurm_update": 1686248596.476063,
     "last_slurm_update_by_sacct": 1686248596.476063,
 }
-BASE_USER_JOB_DICT = {
-    "user_id": "student00@mila.quebec",
-    "job_id": 795002,
-    "cluster_name": "beluga",
-    "labels": {f"name_{i + 1}": f"i am a label {i + 1}" for i in range(4)},
-}
 
 
 def _generate_huge_fake_data(with_labels=False):
@@ -370,9 +364,15 @@ def _generate_huge_fake_data(with_labels=False):
     if with_labels:
         # populate labels
         for i in range(nb_user_job_dicts):
-            user_job_dict = BASE_USER_JOB_DICT.copy()
-            # edit job_id
-            user_job_dict["job_id"] = i + 1
+            user_job_dict = {
+                "user_id": "student00@mila.quebec",
+                "job_id": i + 1,
+                "cluster_name": "beluga",
+                "labels": {
+                    f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}"
+                    for j in range(4)
+                },
+            }
             labels.append(user_job_dict)
 
     return {"users": USERS, "jobs": jobs, "labels": labels}

From ebd4ddf06a6b2a431dbeff5632301f48ed55f5f3 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 26 Feb 2024 09:40:40 -0500
Subject: [PATCH 12/19] Script store_huge_fake_data_in_db: add command-line
 arguments --nb-jobs, --nb-dicts, --nb-props-per-dict

Add scripts for benchmarking
---
 scripts/gen_job_request_benchmark_script.py |  51 +++++
 scripts/job_request_benchmark.py            | 201 ++++++++++++++++++++
 scripts/plot_job_request_benchmark.py       | 144 ++++++++++++++
 scripts/store_huge_fake_data_in_db.py       | 151 ++++++++-------
 4 files changed, 480 insertions(+), 67 deletions(-)
 create mode 100644 scripts/gen_job_request_benchmark_script.py
 create mode 100644 scripts/job_request_benchmark.py
 create mode 100644 scripts/plot_job_request_benchmark.py

diff --git a/scripts/gen_job_request_benchmark_script.py b/scripts/gen_job_request_benchmark_script.py
new file mode 100644
index 00000000..b7621cd8
--- /dev/null
+++ b/scripts/gen_job_request_benchmark_script.py
@@ -0,0 +1,51 @@
+import sys
+
+# max: sum(2**i for i in range(n)) jobs
+# max: sum(2**i for i in range(n)) dicts
+N = 17
+Ks = (1, 10, 100)
+
+NB_REQUESTS = 10
+
+
+def main():
+    if len(sys.argv) == 2:
+        wd = sys.argv[1]
+    else:
+        wd = "local"
+
+    print("set -eu")
+
+    for nb_props_per_dict in Ks:
+        for nb_dicts in range(N + 1):
+            gen_commands(N, nb_dicts, nb_props_per_dict, wd)
+
+    for nb_jobs in range(N):
+        gen_commands(nb_jobs, 0, 1, wd)
+
+    for nb_props_per_dict in Ks:
+        for nb_jobs in range(N):
+            gen_commands(nb_jobs, N, nb_props_per_dict, wd)
+
+
+def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory):
+    task_name = f"jobs-{nb_jobs:02}_dicts-{nb_dicts:02}_props-{nb_props_per_dict:02}"
+
+    cmd_fake_data = (
+        f"python3 scripts/store_huge_fake_data_in_db.py "
+        f"--nb-jobs {nb_jobs} "
+        f"--nb-dicts {nb_dicts} "
+        f"--nb-props-per-dict {nb_props_per_dict}"
+    )
+    cmd_benchmark = (
+        f"python3 scripts/job_request_benchmark.py "
+        f"--config {working_directory}/config.json "
+        f"--nb-requests {NB_REQUESTS} "
+        f"--output {task_name}"
+    )
+    print(cmd_fake_data)
+    print(cmd_benchmark)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/job_request_benchmark.py b/scripts/job_request_benchmark.py
new file mode 100644
index 00000000..b324b8e7
--- /dev/null
+++ b/scripts/job_request_benchmark.py
@@ -0,0 +1,201 @@
+import os
+
+import argparse
+import sys
+import logging
+import time
+from datetime import datetime
+from collections import namedtuple
+import json
+
+try:
+    from clockwork_tools.client import ClockworkToolsClient
+except Exception:
+    print(
+        "Clockwork tools needed. You can install it with `cd clockwork_tools` then `pip install -e .`"
+    )
+    raise
+
+
+log_format = "%(levelname)s:%(name)s:%(asctime)s: %(message)s"
+logging.basicConfig(level=logging.INFO, format=log_format)
+
+logger = logging.getLogger("server_benchmark")
+
+
+class CallStat(
+    namedtuple(
+        "CallStat", ("username", "nb_jobs", "pt_start", "pt_end", "pc_start", "pc_end")
+    )
+):
+    """
+    Class to collect stats and time for 1 request.
+
+    Python provides 2 precision functions for profiling:
+    - time.process_time_ns(): only process time, does not include sleep times.
+    - time.perf_counter_ns(): includes sleep times.
+
+    I made a mistake in previous commits because I measured requests using
+    process_time(). Thus, request times looked very small, as they don't
+    include sleeps, which are used to wait for server response.
+
+    So, I decided to measure both process time and full (perf_counter) time
+    to check how they differ:
+    - process time is still very small (less than 0.10 seconds)
+      and correctly approximated with a linear regression wr/t nunber of jobs.
+    - full time (perf_counter) is very much higher, sometimes up to 10 seconds,
+      and way more irregular (badly approximated with linear regression).
+
+    In practice, I guess the relevant measure is full time (with perf_counter),
+    as it correctly represents how much time user could wait to get response
+    ** if he gets all jobs at once without pagination **.
+    """
+
+    @property
+    def pt_nanoseconds(self):
+        """Duration measured with process time."""
+        return self.pt_end - self.pt_start
+
+    @property
+    def pc_nanoseconds(self):
+        """Duration measured with perf counter (full duration)."""
+        return self.pc_end - self.pc_start
+
+    def summary(self):
+        return {
+            "nb_jobs": self.nb_jobs,
+            "pc_nanoseconds": self.pc_nanoseconds,
+        }
+
+
+class BenchmarkClient(ClockworkToolsClient):
+    """Client with a specific method for profiling."""
+
+    def profile_getting_user_jobs(self, username: str = None) -> CallStat:
+        """Profile a request `jobs/list` with given username and return a CallStat."""
+        pc_start = time.perf_counter_ns()
+        pt_start = time.process_time_ns()
+        jobs = self.jobs_list(username)
+        pt_end = time.process_time_ns()
+        pc_end = time.perf_counter_ns()
+        return CallStat(
+            username=username,
+            nb_jobs=len(jobs),
+            pc_start=pc_start,
+            pc_end=pc_end,
+            pt_start=pt_start,
+            pt_end=pt_end,
+        )
+
+
+def main():
+    argv = sys.argv
+    parser = argparse.ArgumentParser(
+        prog=argv[0],
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument("-a", "--address", help="Server host.")
+    parser.add_argument("-p", "--port", type=int, default=443, help="Server port.")
+    parser.add_argument(
+        "--config",
+        type=str,
+        help=(
+            "Optional JSON configuration file to use for benchmarking. "
+            "If not specified, use --address, --port, and OS environment variables for clockwork api key and email. "
+            "If file exists, ignore --address, --port and OS variables, and read config from file. "
+            "If file does not exist, create file with config values from --address, --port and OS variables. "
+            "Configuration file must contain a dictionary with keys "
+            "'address' (str), 'port` (int), 'api_key` (str), 'email' (str)."
+        ),
+    )
+    parser.add_argument(
+        "-n",
+        "--nb-requests",
+        type=int,
+        default=10,
+        help="Number of requests to send (default, 10).",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        required=True,
+        help="Benchmark name, used to save stats on disk. "
+        "Saved in <working directory>/<output>.json",
+    )
+    args = parser.parse_args(argv[1:])
+    print("Arguments:", args)
+
+    if args.nb_requests < 1:
+        logger.error(f"No positive time specified for benchmarking, exit.")
+        sys.exit(1)
+
+    bench_date = datetime.now()
+    config_path = None
+    working_directory = "."
+    if args.config:
+        config_path = os.path.abspath(args.config)
+        working_directory = os.path.dirname(config_path)
+        # Save next log messages into a file.
+        log_formatter = logging.Formatter(log_format)
+        log_path = os.path.join(working_directory, f"bench_{args.output}.log")
+        logger.info(f"Saving log in: {log_path}")
+        file_handler = logging.FileHandler(log_path)
+        file_handler.setFormatter(log_formatter)
+        logger.addHandler(file_handler)
+
+    if config_path and os.path.isfile(config_path):
+        # Read config file if available.
+        with open(config_path) as file:
+            config = json.load(file)
+        address = config["address"]
+        port = config["port"]
+        api_key = config["api_key"]
+        email = config["email"]
+        logger.info(f"Loaded config from file: address: {address}, port: {port}")
+    else:
+        address = args.address
+        port = args.port
+        # API key and email will be retrieved from OS environment in client constructor.
+        api_key = None
+        email = None
+        if not address:
+            logger.error(
+                "Either --address <port> or --config <file.json> (with existing file) is required."
+            )
+            sys.exit(1)
+
+    client = BenchmarkClient(
+        host=address, port=port, clockwork_api_key=api_key, email=email
+    )
+
+    output = []
+    for i in range(args.nb_requests):
+        cs = client.profile_getting_user_jobs()
+        logger.info(
+            f"[{i + 1}] Sent request for username in {cs.pc_nanoseconds / 1e9} seconds, "
+            f"received {cs.nb_jobs} jobs."
+        )
+        output.append(cs.summary())
+
+    if config_path and not os.path.exists(config_path):
+        # If args.config is defined, we save config file  if args.config does not exist.
+        config = {
+            "address": client.host,
+            "port": client.port,
+            "api_key": client.clockwork_api_key,
+            "email": client.email,
+        }
+        with open(config_path, "w") as file:
+            json.dump(config, file)
+        logger.info(f"Saved config file at: {config_path}")
+
+    output_path = os.path.join(working_directory, f"{args.output}.json")
+    with open(output_path, "w") as file:
+        json.dump(output, file)
+        logger.info(f"Saved stats at: {output_path}")
+    logger.info("End.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/plot_job_request_benchmark.py b/scripts/plot_job_request_benchmark.py
new file mode 100644
index 00000000..766751b4
--- /dev/null
+++ b/scripts/plot_job_request_benchmark.py
@@ -0,0 +1,144 @@
+import os
+import sys
+import json
+
+
+try:
+    import matplotlib.pyplot as plt
+    # plt.figure(figure=(10.8, 7.2), dpi=100)
+except Exception:
+    print(
+        "Matplotlib needed. You can install it with `pip install matplotlib`",
+        file=sys.stderr,
+    )
+    raise
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Missing stats folder", file=sys.stderr)
+        sys.exit(1)
+
+    # Get stat files.
+    folder = sys.argv[1]
+    stats_file_names = []
+    for name in os.listdir(folder):
+        if name.startswith("jobs-") and name.endswith(".json"):
+            stats_file_names.append(name)
+
+    # Get stat data.
+    stats = {}
+    nbs_jobs = []
+    nbs_dicts = []
+    nbs_props = []
+    for name in sorted(stats_file_names):
+        title, extension = name.split(".")
+        jobs_info, dicts_info, props_info = title.split("_")
+        _, nb_jobs = jobs_info.split("-")
+        _, nb_dicts = dicts_info.split("-")
+        _, nb_props_per_dict = props_info.split("-")
+        nb_jobs = int(nb_jobs)
+        nb_dicts = int(nb_dicts)
+        nb_props_per_dict = int(nb_props_per_dict)
+        with open(os.path.join(folder, name)) as file:
+            local_stats = json.load(file)
+            assert len({stat["nb_jobs"] for stat in local_stats}) == 1
+            durations = sorted(stat["pc_nanoseconds"] for stat in local_stats)
+            stats[(nb_jobs, nb_dicts, nb_props_per_dict)] = durations
+            nbs_jobs.append(nb_jobs)
+            nbs_dicts.append(nb_dicts)
+            nbs_props.append(nb_props_per_dict)
+
+    assert max(nbs_jobs) == max(nbs_dicts)
+    N = max(nbs_jobs)
+    Ks = sorted(set(nbs_props))
+
+    _plot_request_time_per_nb_dicts(stats, N, Ks, folder)
+    _plots_request_time_per_nb_jobs(stats, N, Ks, folder)
+
+
+def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str):
+    x_nb_dicts = [_compute_nb_jobs(n) for n in range(N + 1)]
+    y_time = {nb_props: [] for nb_props in Ks}
+
+    for nb_props in Ks:
+        print()
+        for nb_dicts in range(N + 1):
+            key = (N, nb_dicts, nb_props)
+            average_duration = _debug_average_seconds(key, stats[key])
+            y_time[nb_props].append(average_duration)
+
+    fig, ax = plt.subplots()
+    for nb_props in Ks:
+        ax.plot(
+            x_nb_dicts,
+            y_time[nb_props],
+            marker='o',
+            label=f"{_compute_nb_jobs(N)} jobs in DB, {nb_props} prop(s) per dict",
+        )
+    ax.set_title("Request duration per number of job-user dicts")
+    ax.set_xlabel("Number of job-user dicts in DB")
+    ax.set_ylabel("Request duration in seconds")
+    ax.legend()
+    plot_path = os.path.join(
+        folder,
+        f"nb_dicts_to_time_for_{_compute_nb_jobs(N)}_jobs.jpg",
+    )
+    plt.gcf().set_size_inches(20, 10)
+    plt.savefig(plot_path, bbox_inches="tight")
+    plt.close(fig)
+
+
+def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str):
+    x_nb_jobs = [_compute_nb_jobs(n) for n in range(N + 1)]
+    y_time_0_dicts_1_props = []
+    y_time_N_dicts = {nb_props: [] for nb_props in Ks}
+
+    print()
+    for nb_jobs in range(N + 1):
+        key = (nb_jobs, 0, 1)
+        average_duration = _debug_average_seconds(key, stats[key])
+        y_time_0_dicts_1_props.append(average_duration)
+    print()
+    for nb_props in Ks:
+        for nb_jobs in range(N + 1):
+            key = (nb_jobs, N, nb_props)
+            average_duration = _debug_average_seconds(key, stats[key])
+            y_time_N_dicts[nb_props].append(average_duration)
+
+    fig, ax = plt.subplots()
+    ax.plot(x_nb_jobs, y_time_0_dicts_1_props, marker='o', label=f"0 job-user dicts in DB")
+    for nb_props in Ks:
+        ax.plot(
+            x_nb_jobs,
+            y_time_N_dicts[nb_props],
+            marker='o',
+            label=f"{_compute_nb_jobs(N)} job-user dicts in DB, {nb_props} props per dict",
+        )
+    ax.set_title("Request duration per number of jobs")
+    ax.set_xlabel("Number of jobs in DB")
+    ax.set_ylabel("Request duration in seconds")
+    ax.legend()
+    plot_path = os.path.join(folder, f"nb_jobs_to_time.jpg")
+    plt.gcf().set_size_inches(20, 10)
+    plt.savefig(plot_path, bbox_inches="tight")
+    plt.close(fig)
+
+
+def _compute_nb_jobs(n: int):
+    return sum(2**i for i in range(n))
+
+
+def _debug_average_seconds(key, durations):
+    nb_jobs, nb_dicts, nb_props = key
+    avg = sum(durations) / (len(durations) * 1e9)
+    print(
+        f"jobs {nb_jobs:02} dicts {nb_dicts:02} props {nb_props:02}",
+        avg,
+        [d / 1e9 for d in durations],
+    )
+    return avg
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index 2b901a65..5f7f4c7d 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -335,52 +335,65 @@
 }
 
 
-def _generate_huge_fake_data(with_labels=False):
-    nb_jobs_per_user = [2**i for i in range(len(USERS))]
-    assert len(nb_jobs_per_user) == len(USERS)
-    nb_user_job_dicts = sum(nb_jobs_per_user)
+DEFAULT_NB_JOBS = len(USERS)
+DEFAULT_NB_DICTS = DEFAULT_NB_JOBS
+DEFAULT_NB_PROPS_PER_DICT = 4
+
+
+def _generate_huge_fake_data(
+    nb_jobs=DEFAULT_NB_JOBS,
+    nb_dicts=DEFAULT_NB_DICTS,
+    nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT,
+):
     jobs = []
-    labels = []
+    job_user_dicts = []
 
     # populate jobs
-    job_id = 0
-    for user, nb_user_jobs in zip(USERS, nb_jobs_per_user):
-        for i in range(nb_user_jobs):
-            job_id += 1
-            job_slurm = BASE_JOB_SLURM.copy()
-            job_cw = BASE_JOB_CW.copy()
-            # edit slurm.job_id
-            job_slurm["job_id"] = str(job_id)
-            # edit slurm.name
-            job_slurm["name"] = f"job_name_{job_id}"
-            # edit slurm.username
-            job_slurm["username"] = user["cc_account_username"]
-            # edit cw.mila_email_username
-            job_cw["mila_email_username"] = user["mila_email_username"]
-            jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
-    print("Nb. jobs:", job_id)
-    assert job_id == nb_user_job_dicts
-
-    if with_labels:
-        # populate labels
-        for i in range(nb_user_job_dicts):
+    if nb_jobs:
+        assert 1 <= nb_jobs <= len(USERS)
+        nb_jobs_per_user = [2**i for i in range(nb_jobs)]
+        assert len(nb_jobs_per_user) == nb_jobs
+        job_id = 0
+        for user, nb_user_jobs in zip(USERS[:nb_jobs], nb_jobs_per_user):
+            for i in range(nb_user_jobs):
+                job_id += 1
+                job_slurm = BASE_JOB_SLURM.copy()
+                job_cw = BASE_JOB_CW.copy()
+                # edit slurm.job_id
+                job_slurm["job_id"] = str(job_id)
+                # edit slurm.name
+                job_slurm["name"] = f"job_name_{job_id}"
+                # edit slurm.username
+                job_slurm["username"] = user["cc_account_username"]
+                # edit cw.mila_email_username
+                job_cw["mila_email_username"] = user["mila_email_username"]
+                jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
+        print("Nb. jobs:", job_id)
+        assert job_id == sum(nb_jobs_per_user)
+
+    # populate job-user-dicts
+    if nb_dicts:
+        real_nb_dicts = sum(2**i for i in range(nb_dicts))
+        for i in range(real_nb_dicts):
             user_job_dict = {
                 "user_id": "student00@mila.quebec",
                 "job_id": i + 1,
                 "cluster_name": "beluga",
                 "labels": {
                     f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}"
-                    for j in range(4)
+                    for j in range(nb_props_per_dict)
                 },
             }
-            labels.append(user_job_dict)
+            job_user_dicts.append(user_job_dict)
+        print("Nb. dicts:", real_nb_dicts)
+        print("NB. props per dict:", nb_props_per_dict)
 
-    return {"users": USERS, "jobs": jobs, "labels": labels}
+    return {"users": USERS, "jobs": jobs, "labels": job_user_dicts}
 
 
-def populate_fake_data(db_insertion_point, labels=False):
+def populate_fake_data(db_insertion_point, **kwargs):
     print("Generating huge fake data")
-    E = _generate_huge_fake_data(with_labels=labels)
+    E = _generate_huge_fake_data(**kwargs)
     print("Generated huge fake data")
 
     # Create indices. This isn't half as important as when we're
@@ -404,9 +417,14 @@ def populate_fake_data(db_insertion_point, labels=False):
     )
 
     for k in ["users", "jobs", "nodes", "gpu", "labels"]:
+        # Anyway clean before inserting
+        db_insertion_point[k].delete_many({})
         if k in E and E[k]:
             print("Inserting", k)
+            # Then insert
             db_insertion_point[k].insert_many(E[k])
+            # And check count
+            assert db_insertion_point[k].count_documents({}) == len(E[k])
             print("Inserted", k)
 
     def cleanup_function():
@@ -451,48 +469,43 @@ def cleanup_function():
     return cleanup_function
 
 
-def store_data_in_db(labels=False):
+def store_data_in_db(**kwargs):
     # Open the database and insert the contents.
     client = get_mongo_client()
-    populate_fake_data(client[get_config("mongo.database_name")], labels=labels)
-
-
-def modify_timestamps(data):
-    """
-    This function updates the timestamps in order to simulate jobs which have
-    been launched more recently than they were.
-    """
-    # Retrieve the most recent timestamp (ie its end_time)
-    most_recent_timestamp = data["jobs"][0]["slurm"]["end_time"]
-    # most_recent_timestamp = min(job["slurm"]["end_time"] for job in data["jobs"])
-    for job in data["jobs"]:
-        new_end_time = job["slurm"]["end_time"]
-        if new_end_time:
-            if new_end_time > most_recent_timestamp:
-                most_recent_timestamp = new_end_time
-
-    # Retrieve the time interval between this timestamp and now
-    time_delta = datetime.now().timestamp() - most_recent_timestamp
-
-    # Substract it to the timestamps of the jobs
-    for job in data["jobs"]:
-        if job["slurm"]["submit_time"]:
-            job["slurm"]["submit_time"] += time_delta
-        if job["slurm"]["start_time"]:
-            job["slurm"]["start_time"] += time_delta
-        if job["slurm"]["end_time"]:
-            job["slurm"]["end_time"] += time_delta
+    populate_fake_data(client[get_config("mongo.database_name")], **kwargs)
 
 
 def main(argv):
     # Retrieve the arguments passed to the script
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--nb-jobs",
+        type=int,
+        default=DEFAULT_NB_JOBS,
+        help="Number of users for which to add jobs. "
+        "Control the number of jobs in database by generating "
+        "2**i jobs for each user i from user <0> to user <nb-jobs>. "
+        "If 0, no jobs are added. "
+        f"Default is {DEFAULT_NB_JOBS}, for all users available, ie. "
+        f"{sum(2**i for i in range(DEFAULT_NB_JOBS))} total jobs.",
+    )
+    parser.add_argument(
+        "--nb-dicts",
+        type=int,
+        default=DEFAULT_NB_DICTS,
+        help="Control the number of job-user dicts in database by generating "
+        "sum(2**i for i in range(nb-dicts)) dictionaries. "
+        "If 0, no dicts are added. "
+        f"Default is {DEFAULT_NB_DICTS} to match the maximum number of potential jobs, ie. "
+        f"{sum(2**i for i in range(DEFAULT_NB_DICTS))} total dicts.",
+    )
     parser.add_argument(
-        "--labels",
-        type=bool,
-        action=argparse.BooleanOptionalAction,
-        default=False,
-        help="Add fake job-user dicts. If False (default), only add jobs.",
+        "--nb-props-per-dict",
+        type=int,
+        default=DEFAULT_NB_PROPS_PER_DICT,
+        help=f"Number of key-value pairs in each job-user dict.",
     )
     args = parser.parse_args(argv[1:])
     print(args)
@@ -502,7 +515,11 @@ def main(argv):
     register_config("mongo.database_name", "clockwork")
 
     # Store the generated fake data in the database
-    store_data_in_db(labels=args.labels)
+    store_data_in_db(
+        nb_jobs=args.nb_jobs,
+        nb_dicts=args.nb_dicts,
+        nb_props_per_dict=args.nb_props_per_dict,
+    )
 
 
 if __name__ == "__main__":

From b90da413681141d4affb8d3b8d7ca7d04020e6c9 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 26 Feb 2024 19:23:16 -0500
Subject: [PATCH 13/19] Allow to pass raw number of jobs or dicts to benchmark
 scripts.

---
 scripts/gen_job_request_benchmark_script.py |  26 ++--
 scripts/plot_job_request_benchmark.py       |  48 +++++---
 scripts/store_huge_fake_data_in_db.py       | 127 ++++++--------------
 3 files changed, 85 insertions(+), 116 deletions(-)

diff --git a/scripts/gen_job_request_benchmark_script.py b/scripts/gen_job_request_benchmark_script.py
index b7621cd8..21aa7f00 100644
--- a/scripts/gen_job_request_benchmark_script.py
+++ b/scripts/gen_job_request_benchmark_script.py
@@ -1,9 +1,9 @@
 import sys
 
-# max: sum(2**i for i in range(n)) jobs
-# max: sum(2**i for i in range(n)) dicts
-N = 17
-Ks = (1, 10, 100)
+# Ns = [i * 10_000 for i in range(16)]
+Ns = [i * 10_000 for i in range(11)]
+Ks = (1, 500)
+N = Ns[-1]
 
 NB_REQUESTS = 10
 
@@ -17,19 +17,19 @@ def main():
     print("set -eu")
 
     for nb_props_per_dict in Ks:
-        for nb_dicts in range(N + 1):
+        for nb_dicts in Ns:
             gen_commands(N, nb_dicts, nb_props_per_dict, wd)
 
-    for nb_jobs in range(N):
+    for nb_jobs in Ns[:-1]:
         gen_commands(nb_jobs, 0, 1, wd)
 
     for nb_props_per_dict in Ks:
-        for nb_jobs in range(N):
+        for nb_jobs in Ns[:-1]:
             gen_commands(nb_jobs, N, nb_props_per_dict, wd)
 
 
 def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory):
-    task_name = f"jobs-{nb_jobs:02}_dicts-{nb_dicts:02}_props-{nb_props_per_dict:02}"
+    task_name = f"jobs-{nb_jobs:06}_dicts-{nb_dicts:06}_props-{nb_props_per_dict:03}"
 
     cmd_fake_data = (
         f"python3 scripts/store_huge_fake_data_in_db.py "
@@ -43,8 +43,18 @@ def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory):
         f"--nb-requests {NB_REQUESTS} "
         f"--output {task_name}"
     )
+
     print(cmd_fake_data)
+    print('python3 -m flask run --host="0.0.0.0" &')
+    print("export SERVER_PID=$!")
+    print("sleep 1")
+    print(
+        '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"'''
+    )
     print(cmd_benchmark)
+    print("kill $SERVER_PID")
+    print("export SERVER_PID=")
+    print()
 
 
 if __name__ == "__main__":
diff --git a/scripts/plot_job_request_benchmark.py b/scripts/plot_job_request_benchmark.py
index 766751b4..551d1e39 100644
--- a/scripts/plot_job_request_benchmark.py
+++ b/scripts/plot_job_request_benchmark.py
@@ -5,6 +5,7 @@
 
 try:
     import matplotlib.pyplot as plt
+
     # plt.figure(figure=(10.8, 7.2), dpi=100)
 except Exception:
     print(
@@ -49,21 +50,23 @@ def main():
             nbs_dicts.append(nb_dicts)
             nbs_props.append(nb_props_per_dict)
 
-    assert max(nbs_jobs) == max(nbs_dicts)
-    N = max(nbs_jobs)
+    assert sorted(set(nbs_jobs)) == sorted(set(nbs_dicts))
+    Ns = sorted(set(nbs_jobs))
     Ks = sorted(set(nbs_props))
 
-    _plot_request_time_per_nb_dicts(stats, N, Ks, folder)
-    _plots_request_time_per_nb_jobs(stats, N, Ks, folder)
+    _plot_request_time_per_nb_dicts(stats, Ns, Ks, folder)
+    _plots_request_time_per_nb_jobs(stats, Ns, Ks, folder)
+
 
+def _plot_request_time_per_nb_dicts(stats: dict, Ns: list, Ks: list, folder: str):
+    N = max(Ns)
 
-def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str):
-    x_nb_dicts = [_compute_nb_jobs(n) for n in range(N + 1)]
+    x_nb_dicts = list(Ns)
     y_time = {nb_props: [] for nb_props in Ks}
 
     for nb_props in Ks:
         print()
-        for nb_dicts in range(N + 1):
+        for nb_dicts in Ns:
             key = (N, nb_dicts, nb_props)
             average_duration = _debug_average_seconds(key, stats[key])
             y_time[nb_props].append(average_duration)
@@ -73,9 +76,11 @@ def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str):
         ax.plot(
             x_nb_dicts,
             y_time[nb_props],
-            marker='o',
+            marker="o",
             label=f"{_compute_nb_jobs(N)} jobs in DB, {nb_props} prop(s) per dict",
         )
+        _show_points(x_nb_dicts, y_time[nb_props])
+
     ax.set_title("Request duration per number of job-user dicts")
     ax.set_xlabel("Number of job-user dicts in DB")
     ax.set_ylabel("Request duration in seconds")
@@ -89,32 +94,39 @@ def _plot_request_time_per_nb_dicts(stats: dict, N: int, Ks: list, folder: str):
     plt.close(fig)
 
 
-def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str):
-    x_nb_jobs = [_compute_nb_jobs(n) for n in range(N + 1)]
+def _plots_request_time_per_nb_jobs(stats: dict, Ns: list, Ks: list, folder: str):
+    x_nb_jobs = list(Ns)
     y_time_0_dicts_1_props = []
     y_time_N_dicts = {nb_props: [] for nb_props in Ks}
+    N = max(Ns)
 
     print()
-    for nb_jobs in range(N + 1):
+    for nb_jobs in Ns:
         key = (nb_jobs, 0, 1)
         average_duration = _debug_average_seconds(key, stats[key])
         y_time_0_dicts_1_props.append(average_duration)
     print()
     for nb_props in Ks:
-        for nb_jobs in range(N + 1):
+        for nb_jobs in Ns:
             key = (nb_jobs, N, nb_props)
             average_duration = _debug_average_seconds(key, stats[key])
             y_time_N_dicts[nb_props].append(average_duration)
 
     fig, ax = plt.subplots()
-    ax.plot(x_nb_jobs, y_time_0_dicts_1_props, marker='o', label=f"0 job-user dicts in DB")
+    ax.plot(
+        x_nb_jobs, y_time_0_dicts_1_props, marker="o", label=f"0 job-user dicts in DB"
+    )
+    _show_points(x_nb_jobs, y_time_0_dicts_1_props)
+
     for nb_props in Ks:
         ax.plot(
             x_nb_jobs,
             y_time_N_dicts[nb_props],
-            marker='o',
+            marker="o",
             label=f"{_compute_nb_jobs(N)} job-user dicts in DB, {nb_props} props per dict",
         )
+        _show_points(x_nb_jobs, y_time_N_dicts[nb_props])
+
     ax.set_title("Request duration per number of jobs")
     ax.set_xlabel("Number of jobs in DB")
     ax.set_ylabel("Request duration in seconds")
@@ -126,7 +138,13 @@ def _plots_request_time_per_nb_jobs(stats: dict, N: int, Ks: list, folder: str):
 
 
 def _compute_nb_jobs(n: int):
-    return sum(2**i for i in range(n))
+    return n
+
+
+def _show_points(xs, ys):
+    # return
+    for x, y in zip(xs, ys):
+        plt.text(x, y, f"({x}, {round(y, 2)})")
 
 
 def _debug_average_seconds(key, durations):
diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index 5f7f4c7d..e5cd698c 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -335,7 +335,7 @@
 }
 
 
-DEFAULT_NB_JOBS = len(USERS)
+DEFAULT_NB_JOBS = 1_000_000
 DEFAULT_NB_DICTS = DEFAULT_NB_JOBS
 DEFAULT_NB_PROPS_PER_DICT = 4
 
@@ -349,45 +349,37 @@ def _generate_huge_fake_data(
     job_user_dicts = []
 
     # populate jobs
-    if nb_jobs:
-        assert 1 <= nb_jobs <= len(USERS)
-        nb_jobs_per_user = [2**i for i in range(nb_jobs)]
-        assert len(nb_jobs_per_user) == nb_jobs
-        job_id = 0
-        for user, nb_user_jobs in zip(USERS[:nb_jobs], nb_jobs_per_user):
-            for i in range(nb_user_jobs):
-                job_id += 1
-                job_slurm = BASE_JOB_SLURM.copy()
-                job_cw = BASE_JOB_CW.copy()
-                # edit slurm.job_id
-                job_slurm["job_id"] = str(job_id)
-                # edit slurm.name
-                job_slurm["name"] = f"job_name_{job_id}"
-                # edit slurm.username
-                job_slurm["username"] = user["cc_account_username"]
-                # edit cw.mila_email_username
-                job_cw["mila_email_username"] = user["mila_email_username"]
-                jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
-        print("Nb. jobs:", job_id)
-        assert job_id == sum(nb_jobs_per_user)
+    for i in range(nb_jobs):
+        user = USERS[i % len(USERS)]
+        job_id = i + 1
+        job_slurm = BASE_JOB_SLURM.copy()
+        job_cw = BASE_JOB_CW.copy()
+        # edit slurm.job_id
+        job_slurm["job_id"] = str(job_id)
+        # edit slurm.name
+        job_slurm["name"] = f"job_name_{job_id}"
+        # edit slurm.username
+        job_slurm["username"] = user["cc_account_username"]
+        # edit cw.mila_email_username
+        job_cw["mila_email_username"] = user["mila_email_username"]
+        jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
 
     # populate job-user-dicts
-    if nb_dicts:
-        real_nb_dicts = sum(2**i for i in range(nb_dicts))
-        for i in range(real_nb_dicts):
-            user_job_dict = {
-                "user_id": "student00@mila.quebec",
-                "job_id": i + 1,
-                "cluster_name": "beluga",
-                "labels": {
-                    f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}"
-                    for j in range(nb_props_per_dict)
-                },
-            }
-            job_user_dicts.append(user_job_dict)
-        print("Nb. dicts:", real_nb_dicts)
-        print("NB. props per dict:", nb_props_per_dict)
-
+    for i in range(nb_dicts):
+        user_job_dict = {
+            "user_id": "student00@mila.quebec",
+            "job_id": i + 1,
+            "cluster_name": "beluga",
+            "labels": {
+                f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}"
+                for j in range(nb_props_per_dict)
+            },
+        }
+        job_user_dicts.append(user_job_dict)
+
+    print(
+        f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}"
+    )
     return {"users": USERS, "jobs": jobs, "labels": job_user_dicts}
 
 
@@ -420,54 +412,12 @@ def populate_fake_data(db_insertion_point, **kwargs):
         # Anyway clean before inserting
         db_insertion_point[k].delete_many({})
         if k in E and E[k]:
-            print("Inserting", k)
-            # Then insert
+            print(f"Inserting {k}, {len(E[k])} value(s)")
             db_insertion_point[k].insert_many(E[k])
-            # And check count
+            # Check count
             assert db_insertion_point[k].count_documents({}) == len(E[k])
             print("Inserted", k)
 
-    def cleanup_function():
-        """
-        Each of those kinds of data is identified in a unique way,
-        and we can use that identifier to clean up.
-
-        For example, when clearing out jobs, we can look at the "job_id"
-        of the entries that we inserted.
-
-        The point is that we can run a test against the production mongodb on Atlas
-        and not affect the real data. If we cleared the tables completely,
-        then we'd be affecting the real data in a bad way.
-        """
-        for e in E["users"]:
-            db_insertion_point["users"].delete_many(
-                {"mila_email_username": e["mila_email_username"]}
-            )
-
-        for e in E["gpu"]:
-            db_insertion_point["gpu"].delete_many({"name": e["name"]})
-
-        for e in E["labels"]:
-            copy_e = e
-            copy_e.pop("labels")
-            db_insertion_point["labels"].delete_many(copy_e)
-
-        for (k, sub, id_field) in [
-            ("jobs", "slurm", "job_id"),
-            ("nodes", "slurm", "name"),
-        ]:
-            if k in E:
-                for e in E[k]:
-                    # This is complicated, but it's just about a way to say something like
-                    # that we want to remove {"slurm.job_id", e["slurm"]["job_id"]},
-                    # and the weird notation comes from the fact that mongodb filters use dots,
-                    # but not the original python.
-                    db_insertion_point[k].delete_many(
-                        {f"{sub}.{id_field}": e[sub][id_field]}
-                    )
-
-    return cleanup_function
-
 
 def store_data_in_db(**kwargs):
     # Open the database and insert the contents.
@@ -484,22 +434,13 @@ def main(argv):
         "--nb-jobs",
         type=int,
         default=DEFAULT_NB_JOBS,
-        help="Number of users for which to add jobs. "
-        "Control the number of jobs in database by generating "
-        "2**i jobs for each user i from user <0> to user <nb-jobs>. "
-        "If 0, no jobs are added. "
-        f"Default is {DEFAULT_NB_JOBS}, for all users available, ie. "
-        f"{sum(2**i for i in range(DEFAULT_NB_JOBS))} total jobs.",
+        help="Number of jobs to add. May be 0 (no job added).",
     )
     parser.add_argument(
         "--nb-dicts",
         type=int,
         default=DEFAULT_NB_DICTS,
-        help="Control the number of job-user dicts in database by generating "
-        "sum(2**i for i in range(nb-dicts)) dictionaries. "
-        "If 0, no dicts are added. "
-        f"Default is {DEFAULT_NB_DICTS} to match the maximum number of potential jobs, ie. "
-        f"{sum(2**i for i in range(DEFAULT_NB_DICTS))} total dicts.",
+        help="Number of job-user dicts to add. May be 0 (no job added).",
     )
     parser.add_argument(
         "--nb-props-per-dict",

From bb0e6c13e5f9595e68e4a0b32ebc9b1b9a8a26ec Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Thu, 29 Feb 2024 13:54:35 -0500
Subject: [PATCH 14/19] Update

---
 scripts/gen_benchmark_script_students.py |  66 ++++++++++++
 scripts/job_request_benchmark.py         |  28 ++++-
 scripts/plot_benchmark_students.py       | 124 +++++++++++++++++++++++
 scripts/store_huge_fake_data_in_db.py    | 109 ++++++++++++++++----
 4 files changed, 303 insertions(+), 24 deletions(-)
 create mode 100644 scripts/gen_benchmark_script_students.py
 create mode 100644 scripts/plot_benchmark_students.py

diff --git a/scripts/gen_benchmark_script_students.py b/scripts/gen_benchmark_script_students.py
new file mode 100644
index 00000000..26535e50
--- /dev/null
+++ b/scripts/gen_benchmark_script_students.py
@@ -0,0 +1,66 @@
+import sys
+import os
+
+SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000]
+SIZES_STUDENT01 = list(range(0, 101, 20))
+NB_PROPS_PER_DICT = 4
+
+NB_REQUESTS = 10
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Missing output folder name", file=sys.stderr)
+        exit(1)
+
+    wd = sys.argv[1]
+    if not os.path.exists(wd):
+        os.mkdir(wd)
+
+    print("set -eu")
+    print("export CLOCKWORK_API_KEY='000aaa01'")
+    print("export CLOCKWORK_EMAIL='student01@mila.quebec'")
+    print()
+
+    for std_00 in SIZES_STUDENT00:
+        for std_01 in SIZES_STUDENT01:
+            gen_commands(std_00, std_01, wd)
+
+
+def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory):
+    task_name = f"student00-{nb_jobs_student00:06}_student01-{nb_jobs_student01:06}"
+    nb_dicts = nb_jobs_student00 + nb_jobs_student01
+    nb_props_per_dict = NB_PROPS_PER_DICT
+
+    cmd_fake_data = (
+        f"python3 scripts/store_huge_fake_data_in_db.py "
+        f"-j student00={nb_jobs_student00} "
+        f"-j student01={nb_jobs_student01} "
+        f"--nb-dicts {nb_dicts} "
+        f"--nb-props-per-dict {nb_props_per_dict}"
+    )
+    cmd_benchmark = (
+        f"python3 scripts/job_request_benchmark.py "
+        f"-w {working_directory} "
+        f'--address "0.0.0.0" '
+        f"--port 5000 "
+        f'--username "student01@mila.quebec" '
+        f"--nb-requests {NB_REQUESTS} "
+        f"--output {task_name}"
+    )
+
+    print(cmd_fake_data)
+    print('python3 -m flask run --host="0.0.0.0" &')
+    print("export SERVER_PID=$!")
+    print("sleep 1")
+    print(
+        '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"'''
+    )
+    print(cmd_benchmark)
+    print("kill $SERVER_PID")
+    print("export SERVER_PID=")
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/job_request_benchmark.py b/scripts/job_request_benchmark.py
index b324b8e7..cb293c58 100644
--- a/scripts/job_request_benchmark.py
+++ b/scripts/job_request_benchmark.py
@@ -4,7 +4,6 @@
 import sys
 import logging
 import time
-from datetime import datetime
 from collections import namedtuple
 import json
 
@@ -108,6 +107,28 @@ def main():
             "'address' (str), 'port` (int), 'api_key` (str), 'email' (str)."
         ),
     )
+    parser.add_argument(
+        "-w",
+        "--working-directory",
+        type=str,
+        default=".",
+        help=(
+            "Working directory. "
+            "Default is '.'. "
+            "If `--config` specified, `--working-directory` is ignored "
+            "and working directory is config folder."
+        ),
+    )
+    parser.add_argument(
+        "-u",
+        "--username",
+        type=str,
+        help=(
+            "Optional email of specific username for which we want to search jobs. "
+            "By default, no username is specified, and all jobs visible by logged user "
+            "(using client email an api key) are retrieved."
+        ),
+    )
     parser.add_argument(
         "-n",
         "--nb-requests",
@@ -130,9 +151,8 @@ def main():
         logger.error(f"No positive time specified for benchmarking, exit.")
         sys.exit(1)
 
-    bench_date = datetime.now()
     config_path = None
-    working_directory = "."
+    working_directory = args.working_directory
     if args.config:
         config_path = os.path.abspath(args.config)
         working_directory = os.path.dirname(config_path)
@@ -171,7 +191,7 @@ def main():
 
     output = []
     for i in range(args.nb_requests):
-        cs = client.profile_getting_user_jobs()
+        cs = client.profile_getting_user_jobs(username=args.username)
         logger.info(
             f"[{i + 1}] Sent request for username in {cs.pc_nanoseconds / 1e9} seconds, "
             f"received {cs.nb_jobs} jobs."
diff --git a/scripts/plot_benchmark_students.py b/scripts/plot_benchmark_students.py
new file mode 100644
index 00000000..5ae29b2b
--- /dev/null
+++ b/scripts/plot_benchmark_students.py
@@ -0,0 +1,124 @@
+import os
+import sys
+import json
+
+
+try:
+    import matplotlib.pyplot as plt
+    from matplotlib import colors
+
+    # plt.figure(figure=(10.8, 7.2), dpi=100)
+except Exception:
+    print(
+        "Matplotlib needed. You can install it with `pip install matplotlib`",
+        file=sys.stderr,
+    )
+    raise
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Missing stats folder", file=sys.stderr)
+        sys.exit(1)
+
+    # Get stat files.
+    folder = sys.argv[1]
+    stats_file_names = []
+    for name in os.listdir(folder):
+        if name.startswith("student00-") and name.endswith(".json"):
+            stats_file_names.append(name)
+
+    # Get stat data.
+    stats = {}
+    for name in sorted(stats_file_names):
+        title, extension = name.split(".")
+        info_student00, info_student01 = title.split("_")
+        _, nb_jobs_student00 = info_student00.split("-")
+        _, nb_jobs_student01 = info_student01.split("-")
+        nb_jobs_student00 = int(nb_jobs_student00)
+        nb_jobs_student01 = int(nb_jobs_student01)
+
+        with open(os.path.join(folder, name)) as file:
+            local_stats = json.load(file)
+            nbs_jobs = {stat["nb_jobs"] for stat in local_stats}
+            assert len(nbs_jobs) == 1
+            assert next(iter(nbs_jobs)) == nb_jobs_student01
+            durations = sorted(stat["pc_nanoseconds"] for stat in local_stats)
+            stats[(nb_jobs_student00, nb_jobs_student01)] = durations
+
+    _plots_request_time_per_nb_jobs(stats, folder)
+
+
+def _plots_request_time_per_nb_jobs(stats: dict, folder: str):
+    cdict = {
+        "red": (
+            (0.0, 0.0, 0.0),
+            # (1.0, 0.5, 0.5),
+            (1.0, 1.0, 0.0),
+        ),
+        "green": (
+            (0.0, 0.0, 1.0),
+            # (1.0, 0.5, 0.5),
+            (1.0, 0.0, 0.0),
+        ),
+        "blue": (
+            (0.0, 0.0, 0.0),
+            # (1.0, 0.0, 0.0),
+            (1.0, 0.0, 0.0),
+        ),
+    }
+
+    cmap = colors.LinearSegmentedColormap("custom", cdict)
+
+    student00_to_plot = {}
+    for (student00, student01), durations in stats.items():
+        average_duration = _debug_average_seconds((student00, student01), durations)
+        student00_to_plot.setdefault(student00, []).append(
+            (student01, average_duration)
+        )
+
+    fig, ax = plt.subplots()
+    n = len(student00_to_plot) - 1
+    for i, student00 in enumerate(sorted(student00_to_plot.keys())):
+        local_data = student00_to_plot[student00]
+        xs = [couple[0] for couple in local_data]
+        ys = [couple[1] for couple in local_data]
+        print(cmap(i / n))
+        ax.plot(
+            xs,
+            ys,
+            marker="o",
+            label=f"student00: {student00} jobs",
+            c=cmap(i / n),
+        )
+        # _show_points(xs, ys)
+
+    ax.set_title("Request duration per number of jobs for student01")
+    ax.set_xlabel("Number of student01's jobs in DB")
+    ax.set_ylabel("Request duration in seconds")
+    ax.legend()
+    plot_path = os.path.join(folder, f"nb_student01_jobs_to_time.jpg")
+    plt.gcf().set_size_inches(20, 10)
+    plt.savefig(plot_path, bbox_inches="tight")
+    plt.close(fig)
+
+
+def _show_points(xs, ys):
+    # return
+    for x, y in zip(xs, ys):
+        plt.text(x, y, f"({x}, {round(y, 2)})")
+
+
+def _debug_average_seconds(key, durations):
+    sdt00, std01 = key
+    avg = sum(durations) / (len(durations) * 1e9)
+    print(
+        f"student00 {sdt00:02} student01 {std01:02}",
+        avg,
+        [d / 1e9 for d in durations],
+    )
+    return avg
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index e5cd698c..28e8551e 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -342,32 +342,91 @@
 
 def _generate_huge_fake_data(
     nb_jobs=DEFAULT_NB_JOBS,
+    nb_student_jobs=None,
     nb_dicts=DEFAULT_NB_DICTS,
     nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT,
 ):
+    student_to_nb_jobs = []
+    if nb_student_jobs is not None:
+        for desc in nb_student_jobs:
+            student_name, str_nb_student_jobs = desc.split("=")
+            nb_student_jobs = int(str_nb_student_jobs.strip())
+            student_to_nb_jobs.append((student_name.strip(), nb_student_jobs))
+    else:
+        assert nb_jobs >= 0
+
     jobs = []
-    job_user_dicts = []
 
     # populate jobs
-    for i in range(nb_jobs):
-        user = USERS[i % len(USERS)]
-        job_id = i + 1
-        job_slurm = BASE_JOB_SLURM.copy()
-        job_cw = BASE_JOB_CW.copy()
-        # edit slurm.job_id
-        job_slurm["job_id"] = str(job_id)
-        # edit slurm.name
-        job_slurm["name"] = f"job_name_{job_id}"
-        # edit slurm.username
-        job_slurm["username"] = user["cc_account_username"]
-        # edit cw.mila_email_username
-        job_cw["mila_email_username"] = user["mila_email_username"]
-        jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
+    if student_to_nb_jobs:
+        user_map = {user["mila_email_username"]: user for user in USERS}
+        assert len(user_map) == len(USERS)
+        job_id = 0
+        for student_name, nb_student_jobs in student_to_nb_jobs:
+            student_email = f"{student_name}@mila.quebec"
+            user = user_map[student_email]
+            for i in range(nb_student_jobs):
+                job_id += 1
+                jobs.append(
+                    {
+                        "slurm": {
+                            "account": "def-patate-rrg",
+                            "cluster_name": "beluga",
+                            "time_limit": 4320,
+                            "submit_time": 1681680327,
+                            "start_time": 0,
+                            "end_time": 0,
+                            "exit_code": "SUCCESS:0",
+                            "array_job_id": "0",
+                            "array_task_id": "None",
+                            "job_id": str(job_id),
+                            "name": f"job_name_{job_id}",
+                            "nodes": "None assigned",
+                            "partition": "other_fun_partition",
+                            "job_state": "PENDING",
+                            "tres_allocated": {},
+                            "tres_requested": {
+                                "num_cpus": 80,
+                                "mem": 95000,
+                                "num_nodes": 1,
+                                "billing": 80,
+                            },
+                            "username": user["cc_account_username"],
+                            "working_directory": "/a809/b333/c569",
+                        },
+                        "cw": {
+                            "mila_email_username": user["mila_email_username"],
+                            "last_slurm_update": 1686248596.476063,
+                            "last_slurm_update_by_sacct": 1686248596.476063,
+                        },
+                        "user": {},
+                    }
+                )
+
+            print(f"Student {student_email}: {nb_student_jobs} jobs")
+
+        assert job_id == len(jobs)
+    else:
+        for i in range(nb_jobs):
+            user = USERS[i % len(USERS)]
+            job_id = i + 1
+            job_slurm = BASE_JOB_SLURM.copy()
+            job_cw = BASE_JOB_CW.copy()
+            # edit slurm.job_id
+            job_slurm["job_id"] = str(job_id)
+            # edit slurm.name
+            job_slurm["name"] = f"job_name_{job_id}"
+            # edit slurm.username
+            job_slurm["username"] = user["cc_account_username"]
+            # edit cw.mila_email_username
+            job_cw["mila_email_username"] = user["mila_email_username"]
+            jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
 
     # populate job-user-dicts
-    for i in range(nb_dicts):
-        user_job_dict = {
-            "user_id": "student00@mila.quebec",
+    props_editor = "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec"
+    job_user_dicts = [
+        {
+            "user_id": props_editor,
             "job_id": i + 1,
             "cluster_name": "beluga",
             "labels": {
@@ -375,7 +434,8 @@ def _generate_huge_fake_data(
                 for j in range(nb_props_per_dict)
             },
         }
-        job_user_dicts.append(user_job_dict)
+        for i in range(nb_dicts)
+    ]
 
     print(
         f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}"
@@ -430,7 +490,15 @@ def main(argv):
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
-    parser.add_argument(
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        "-j",
+        "--nb-student-jobs",
+        action="append",
+        type=str,
+        help="Number of job for a specific student, in format: <student>=<nb-jobs>. Accept multiple declarations. Example: -j student00=100 -j student05=1900",
+    )
+    group.add_argument(
         "--nb-jobs",
         type=int,
         default=DEFAULT_NB_JOBS,
@@ -458,6 +526,7 @@ def main(argv):
     # Store the generated fake data in the database
     store_data_in_db(
         nb_jobs=args.nb_jobs,
+        nb_student_jobs=args.nb_student_jobs,
         nb_dicts=args.nb_dicts,
         nb_props_per_dict=args.nb_props_per_dict,
     )

From 679d1e5c5ef242580897806c169da86aebf7d9a7 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Thu, 29 Feb 2024 15:45:17 -0500
Subject: [PATCH 15/19] Rename collection and related texts from "labels" to
 "job user props".

---
 clockwork_web/browser_routes/jobs.py     |  8 +--
 clockwork_web/core/jobs_helper.py        | 71 ++++++++++++------------
 clockwork_web/core/search_helper.py      | 12 ++--
 clockwork_web/templates/base.html        | 14 ++---
 clockwork_web/templates/jobs_search.html | 18 +++---
 clockwork_web/templates/settings.html    |  4 +-
 scripts/store_huge_fake_data_in_db.py    | 18 +++---
 test_common/fake_data.json               | 34 ++++++------
 test_common/fake_data.py                 | 30 +++++-----
 test_common/jobs_test_helpers.py         |  4 +-
 10 files changed, 109 insertions(+), 104 deletions(-)

diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py
index b3dffbfb..72bbfc18 100644
--- a/clockwork_web/browser_routes/jobs.py
+++ b/clockwork_web/browser_routes/jobs.py
@@ -101,8 +101,8 @@ def route_search():
     - "sort_asc" is an optional integer and used to specify if sorting is
       ascending (1) or descending (-1). Default is 1.
     - "job_array" is optional and used to specify the job array in which we are looking for jobs
-    - "job_label_name" is optional and used to specify the label name associated to jobs we are looking for
-    - "job_label_content" is optional and used to specify the label value associated to jobs we are looking for
+    - "user_prop_name" is optional and used to specify the user prop name associated to jobs we are looking for
+    - "user_prop_content" is optional and used to specify the user prop value associated to jobs we are looking for
 
     .. :quickref: list all Slurm job as formatted html
     """
@@ -166,8 +166,8 @@ def route_search():
                 "sort_by": query.sort_by,
                 "sort_asc": query.sort_asc,
                 "job_array": query.job_array,
-                "job_label_name": query.job_label_name,
-                "job_label_content": query.job_label_content,
+                "user_prop_name": query.user_prop_name,
+                "user_prop_content": query.user_prop_content,
             },
         )
 
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index 30f82354..d70d699b 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -158,32 +158,35 @@ def get_filtered_and_paginated_jobs(
         # on the server because not enough memory was allocated to perform the sorting.
         LD_jobs = list(mc["jobs"].find(mongodb_filter))
 
-    # Get job labels
+    # Get job user props
     if LD_jobs:
-        label_map = {}
-        # Collect all labels related to found jobs,
-        # and store them in a dict with keys (user ID, job ID, cluster_name)
-        for label in list(
-            mc["labels"].find(
+        user_props_map = {}
+        # Collect all job user props related to found jobs,
+        # and store them in a dict with keys (mila email username, job ID, cluster_name)
+        for user_props in list(
+            mc["job_user_props"].find(
                 combine_all_mongodb_filters(
                     {
                         "job_id": {
                             "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs]
                         },
-                        "user_id": current_user.mila_email_username,
+                        "mila_email_username": current_user.mila_email_username,
                     }
                 )
             )
         ):
-            # Remove MongoDB identifier, as we won't use it.
-            label.pop("_id")
-            key = (label["user_id"], label["job_id"], label["cluster_name"])
-            assert key not in label_map
-            label_map[key] = label["labels"]
-
-        if label_map:
-            # Populate jobs with labels using job's user email,  job ID and cluster name
-            # to find related labels in labels dict.
+            key = (
+                user_props["mila_email_username"],
+                user_props["job_id"],
+                user_props["cluster_name"],
+            )
+            assert key not in user_props_map
+            user_props_map[key] = user_props["props"]
+
+        if user_props_map:
+            # Populate jobs with user props using
+            # current user email, job ID and job cluster name
+            # to find related user props in props map.
             for job in LD_jobs:
                 key = (
                     # job["cw"]["mila_email_username"],
@@ -191,8 +194,8 @@ def get_filtered_and_paginated_jobs(
                     int(job["slurm"]["job_id"]),
                     job["slurm"]["cluster_name"],
                 )
-                if key in label_map:
-                    job["job_labels"] = label_map[key]
+                if key in user_props_map:
+                    job["job_user_props"] = user_props_map[key]
 
     # Set nbr_total_jobs
     if want_count:
@@ -272,8 +275,8 @@ def get_jobs(
     sort_by="submit_time",
     sort_asc=-1,
     job_array=None,
-    job_label_name=None,
-    job_label_content=None,
+    user_prop_name=None,
+    user_prop_content=None,
 ):
     """
     Set up the filters according to the parameters and retrieve the requested jobs from the database.
@@ -291,8 +294,8 @@ def get_jobs(
         sort_asc                Whether or not to sort in ascending order (1)
                                 or descending order (-1).
         job_array               ID of job array in which we look for jobs.
-        job_label_name          name of label (string) we must find in jobs to look for.
-        job_label_content       content of label (string) we must find in jobs to look for.
+        user_prop_name          name of user prop (string) we must find in jobs to look for.
+        user_prop_content       content of user prop (string) we must find in jobs to look for.
 
     Returns:
         A tuple containing:
@@ -300,24 +303,24 @@ def get_jobs(
             - the total number of jobs corresponding of the filters in the databse, if want_count has been set to
             True, None otherwise, as second element
     """
-    # If job label is specified,
-    # get job indices from jobs associated to this label.
-    if job_label_name is not None and job_label_content is not None:
+    # If job user prop is specified,
+    # get job indices from jobs associated to this prop.
+    if user_prop_name is not None and user_prop_content is not None:
         mc = get_db()
-        label_job_ids = [
-            str(label["job_id"])
-            for label in mc["labels"].find(
+        props_job_ids = [
+            str(user_props["job_id"])
+            for user_props in mc["job_user_props"].find(
                 combine_all_mongodb_filters(
-                    {f"labels.{job_label_name}": job_label_content}
+                    {f"props.{user_prop_name}": user_prop_content}
                 )
             )
         ]
         if job_ids:
-            # If job ids where provided, make intersection between given job ids and labelled job ids.
-            job_ids = list(set(label_job_ids) & set(job_ids))
+            # If job ids where provided, make intersection between given job ids and props job ids.
+            job_ids = list(set(props_job_ids) & set(job_ids))
         else:
-            # Otherwise, just use labelled job ids.
-            job_ids = label_job_ids
+            # Otherwise, just use props job ids.
+            job_ids = props_job_ids
 
     # Set up and combine filters
     filter = get_global_filter(
@@ -464,7 +467,7 @@ def get_jobs_properties_list_per_page():
             "user",
             "job_id",
             "job_array",
-            "job_labels",
+            "job_user_props",
             "job_name",
             "job_state",
             "start_time",
diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py
index 54e6a75c..2650c201 100644
--- a/clockwork_web/core/search_helper.py
+++ b/clockwork_web/core/search_helper.py
@@ -21,8 +21,8 @@ def parse_search_request(user, args, force_pagination=True):
     want_count = to_boolean(want_count)
 
     job_array = args.get("job_array", type=int, default=None)
-    job_label_name = args.get("job_label_name", type=str, default=None) or None
-    job_label_content = args.get("job_label_content", type=str, default=None) or None
+    user_prop_name = args.get("user_prop_name", type=str, default=None) or None
+    user_prop_content = args.get("user_prop_content", type=str, default=None) or None
 
     default_page_number = "1" if force_pagination else None
 
@@ -73,8 +73,8 @@ def parse_search_request(user, args, force_pagination=True):
         sort_asc=sort_asc,
         want_count=want_count,
         job_array=job_array,
-        job_label_name=job_label_name,
-        job_label_content=job_label_content,
+        user_prop_name=user_prop_name,
+        user_prop_content=user_prop_content,
     )
 
     #########################
@@ -119,7 +119,7 @@ def search_request(user, args, force_pagination=True):
         sort_by=query.sort_by,
         sort_asc=query.sort_asc,
         job_array=query.job_array,
-        job_label_name=query.job_label_name,
-        job_label_content=query.job_label_content,
+        user_prop_name=query.user_prop_name,
+        user_prop_content=query.user_prop_content,
     )
     return (query, jobs, nbr_total_jobs)
diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html
index 51d8c7d8..8730dcd2 100644
--- a/clockwork_web/templates/base.html
+++ b/clockwork_web/templates/base.html
@@ -324,11 +324,11 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
                                 {% if previous_request_args['job_array'] is not none %}
                                 <input type="hidden" name="job_array" value="{{ previous_request_args['job_array'] }}"/>
                                 {% endif %}
-                                {% if previous_request_args['job_label_name'] is not none %}
-                                <input type="hidden" name="job_label_name" value="{{ previous_request_args['job_label_name'] }}"/>
+                                {% if previous_request_args['user_prop_name'] is not none %}
+                                <input type="hidden" name="user_prop_name" value="{{ previous_request_args['user_prop_name'] }}"/>
                                 {% endif %}
-                                {% if previous_request_args['job_label_content'] is not none %}
-                                <input type="hidden" name="job_label_content" value="{{ previous_request_args['job_label_content'] }}"/>
+                                {% if previous_request_args['user_prop_content'] is not none %}
+                                <input type="hidden" name="user_prop_content" value="{{ previous_request_args['user_prop_content'] }}"/>
                                 {% endif %}
 
 				                 <div class="row align-items-center">
@@ -341,9 +341,9 @@ <h1><a data-bs-toggle="collapse" data-bs-target=".formCollapse" aria-expanded="f
 										 </a>
 										 {% endif %}
 
-										 {% if previous_request_args['job_label_name'] is not none and previous_request_args['job_label_content'] is not none %}
-										 <a href="{{ modify_query(job_label_name='', job_label_content='') }}" title="Reset filter by job label" class="px-3 py-2">
-											 Label <strong>{{ previous_request_args['job_label_name'] }}</strong>: "{{ previous_request_args['job_label_content'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
+										 {% if previous_request_args['user_prop_name'] is not none and previous_request_args['user_prop_content'] is not none %}
+										 <a href="{{ modify_query(user_prop_name='', user_prop_content='') }}" title="Reset filter by job user prop" class="px-3 py-2">
+											 User prop <strong>{{ previous_request_args['user_prop_name'] }}</strong>: "{{ previous_request_args['user_prop_content'] }}"&nbsp;&nbsp;&nbsp;&nbsp;
 											 <i class="fa-solid fa-circle-xmark" style="color: #888a85;"></i>
 										 </a>
 										 {% endif %}
diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html
index 2bd20321..f225a7c0 100644
--- a/clockwork_web/templates/jobs_search.html
+++ b/clockwork_web/templates/jobs_search.html
@@ -101,9 +101,9 @@ <h1>JOBS</h1>
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_array")) %}
                                 <th>Job array</th>
                             {% endif %}
-                            <!-- Job labels header -->
-                            {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
-                                <th>labels</th>
+                            <!-- Job user props header -->
+                            {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %}
+                                <th>Job-user props</th>
                             {% endif %}
                             <!-- Job name header -->
                             {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %}
@@ -197,14 +197,14 @@ <h1>JOBS</h1>
                                 </td>
                             {% endif %}
 
-                            <!-- Job labels -->
-                            {% if (web_settings | check_web_settings_column_display(page_name, "job_labels")) %}
+                            <!-- Job user props -->
+                            {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %}
                                 <td>
-                                    {% for D_label_name, D_label_content in D_job.get('job_labels', {}).items() %}
+                                    {% for D_user_prop_name, D_user_prop_content in D_job.get('job_user_props', {}).items() %}
                                     <p>
-                                    <a href="{{ modify_query(job_label_name=D_label_name, job_label_content=D_label_content, page_num=1) }}" title="Filter by job label &quot;{{D_label_name}}&quot;: &quot;{{D_label_content}}&quot;">
-                                        <strong>{{ D_label_name }}</strong><br/>
-                                        {{ D_label_content }}
+                                    <a href="{{ modify_query(user_prop_name=D_user_prop_name, user_prop_content=D_user_prop_content, page_num=1) }}" title="Filter by job-user prop &quot;{{D_user_prop_name}}&quot;: &quot;{{D_user_prop_content}}&quot;">
+                                        <strong>{{ D_user_prop_name }}</strong><br/>
+                                        {{ D_user_prop_content }}
                                     </a>
                                     </p>
                                     {% endfor %}
diff --git a/clockwork_web/templates/settings.html b/clockwork_web/templates/settings.html
index 8fefe2ed..a9b04f51 100644
--- a/clockwork_web/templates/settings.html
+++ b/clockwork_web/templates/settings.html
@@ -279,7 +279,7 @@ <h1>{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr
 		                            <th>{{ gettext("User (@mila.quebec)") }}</th>
 		                            <th>{{ gettext("Job ID") }}</th>
 		                            <th>{{ gettext("Job array") }}</th>
-		                            <th>{{ gettext("Job labels") }}</th>
+		                            <th>{{ gettext("Job-user props") }}</th>
 		                            <th>{{ gettext("Job name [:20]") }}</th>
 		                            <th>{{ gettext("Job state") }}</th>
 		                            <th>{{ gettext("Submit time") }}</th>
@@ -292,7 +292,7 @@ <h1>{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr
 		                    <tbody>
 		                    	<tr>
 		                    		{% set page_name = "jobs_list" %}
-									{% for column_name in ["clusters", "user","job_id", "job_array", "job_labels", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
+									{% for column_name in ["clusters", "user","job_id", "job_array", "job_user_props", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
 		                    		<td><div class="form-check form-switch">
 										{% if (web_settings | check_web_settings_column_display(page_name, column_name)) %}
 											<input name="{{page_name}}_{{column_name}}_toggle" id="{{page_name}}_{{column_name}}_toggle" type="checkbox" class="form-check-input" onclick="switch_column_setting('{{page_name}}', '{{column_name}}')" checked />
diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index 28e8551e..73fd07e5 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -423,13 +423,15 @@ def _generate_huge_fake_data(
             jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
 
     # populate job-user-dicts
-    props_editor = "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec"
+    props_editor = (
+        "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec"
+    )
     job_user_dicts = [
         {
-            "user_id": props_editor,
+            "mila_email_username": props_editor,
             "job_id": i + 1,
             "cluster_name": "beluga",
-            "labels": {
+            "props": {
                 f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}"
                 for j in range(nb_props_per_dict)
             },
@@ -440,7 +442,7 @@ def _generate_huge_fake_data(
     print(
         f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}"
     )
-    return {"users": USERS, "jobs": jobs, "labels": job_user_dicts}
+    return {"users": USERS, "jobs": jobs, "job_user_props": job_user_dicts}
 
 
 def populate_fake_data(db_insertion_point, **kwargs):
@@ -463,12 +465,12 @@ def populate_fake_data(db_insertion_point, **kwargs):
         [("mila_email_username", 1)], name="users_email_index"
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
-    db_insertion_point["labels"].create_index(
-        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)],
-        name="job_label_index",
+    db_insertion_point["job_user_props"].create_index(
+        [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)],
+        name="job_user_props_index",
     )
 
-    for k in ["users", "jobs", "nodes", "gpu", "labels"]:
+    for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
         # Anyway clean before inserting
         db_insertion_point[k].delete_many({})
         if k in E and E[k]:
diff --git a/test_common/fake_data.json b/test_common/fake_data.json
index 0ffcbee2..29c18e58 100644
--- a/test_common/fake_data.json
+++ b/test_common/fake_data.json
@@ -5963,46 +5963,46 @@
       "tflops_fp32": 16.31
     }
   ],
-  "labels": [
+  "job_user_props": [
     {
-      "user_id": "student06@mila.quebec",
+      "mila_email_username": "student00@mila.quebec",
       "job_id": 795002,
       "cluster_name": "mila",
-      "labels": {
-        "name": "je suis un label 1"
+      "props": {
+        "name": "je suis une user prop 1"
       }
     },
     {
-      "user_id": "student16@mila.quebec",
+      "mila_email_username": "student00@mila.quebec",
       "job_id": 606872,
       "cluster_name": "mila",
-      "labels": {
-        "name": "je suis un label 2"
+      "props": {
+        "name": "je suis une user prop 2"
       }
     },
     {
-      "user_id": "student15@mila.quebec",
+      "mila_email_username": "student00@mila.quebec",
       "job_id": 834395,
       "cluster_name": "graham",
-      "labels": {
-        "name": "je suis un label 3"
+      "props": {
+        "name": "je suis une user prop 3"
       }
     },
     {
-      "user_id": "student15@mila.quebec",
+      "mila_email_username": "student00@mila.quebec",
       "job_id": 154325,
       "cluster_name": "graham",
-      "labels": {
-        "name": "je suis un label 3",
-        "name2": "je suis un label 4"
+      "props": {
+        "name": "je suis une user prop 3",
+        "name2": "je suis une user prop 4"
       }
     },
     {
-      "user_id": "student12@mila.quebec",
+      "mila_email_username": "student00@mila.quebec",
       "job_id": 613024,
       "cluster_name": "graham",
-      "labels": {
-        "name": "je suis un label 1"
+      "props": {
+        "name": "je suis une user prop 1"
       }
     }
   ]
diff --git a/test_common/fake_data.py b/test_common/fake_data.py
index 0677e267..7efb55ea 100644
--- a/test_common/fake_data.py
+++ b/test_common/fake_data.py
@@ -21,18 +21,18 @@ def fake_data():
     with open(json_file, "r") as f:
         E = json.load(f)
 
-    # Add labels to jobs
+    # Add user props to jobs
     for job in E["jobs"]:
         job_id = int(job["slurm"]["job_id"])
-        user_id = job["cw"]["mila_email_username"]
+        mila_email_username = job["cw"]["mila_email_username"]
         cluster_name = job["slurm"]["cluster_name"]
-        for label in E["labels"]:
+        for user_props in E["job_user_props"]:
             if (
-                label["job_id"] == job_id
-                and label["user_id"] == user_id
-                and label["cluster_name"] == cluster_name
+                user_props["job_id"] == job_id
+                and user_props["mila_email_username"] == mila_email_username
+                and user_props["cluster_name"] == cluster_name
             ):
-                job["job_labels"] = label["labels"]
+                job["job_user_props"] = user_props["props"]
 
     mutate_some_job_status(E)
     return E
@@ -84,12 +84,12 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False):
         [("mila_email_username", 1)], name="users_email_index"
     )
     db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
-    db_insertion_point["labels"].create_index(
-        [("user_id", 1), ("job_id", 1), ("cluster_name", 1), ("labels", 1)],
-        name="job_label_index",
+    db_insertion_point["job_user_props"].create_index(
+        [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)],
+        name="job_user_props_index",
     )
 
-    for k in ["users", "jobs", "nodes", "gpu", "labels"]:
+    for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
         if k in E:
             for e in E[k]:
                 db_insertion_point[k].insert_one(e)
@@ -114,10 +114,10 @@ def cleanup_function():
         for e in E["gpu"]:
             db_insertion_point["gpu"].delete_many({"name": e["name"]})
 
-        for e in E["labels"]:
-            copy_e = e
-            copy_e.pop("labels")
-            db_insertion_point["labels"].delete_many(copy_e)
+        for e in E["job_user_props"]:
+            copy_e = e.copy()
+            copy_e.pop("props")
+            db_insertion_point["job_user_props"].delete_many(copy_e)
 
         for (k, sub, id_field) in [
             ("jobs", "slurm", "job_id"),
diff --git a/test_common/jobs_test_helpers.py b/test_common/jobs_test_helpers.py
index b66ac7af..a1f81872 100644
--- a/test_common/jobs_test_helpers.py
+++ b/test_common/jobs_test_helpers.py
@@ -34,7 +34,7 @@ def helper_single_job_at_random(fake_data, cluster_name):
 
     def validator(D_job):
         for k1 in original_D_job:
-            assert k1 in ["slurm", "cw", "user", "job_labels"]
+            assert k1 in ["slurm", "cw", "user", "job_user_props"]
             assert D_job[k1] == original_D_job[k1], f"{D_job}\n{original_D_job}"
 
     return validator, job_id
@@ -164,7 +164,7 @@ def validator(LD_jobs):
         # compare all the dicts one by one
         for (D_job, D_original_job) in zip(LD_jobs, LD_original_jobs):
             for k1 in D_original_job:
-                assert k1 in ["slurm", "cw", "user", "job_labels"]
+                assert k1 in ["slurm", "cw", "user", "job_user_props"]
                 assert D_job[k1] == D_original_job[k1]
 
     return validator

From 85c6017ff1e9c24377639d6d3e12a76092e9f8e3 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 4 Mar 2024 09:45:56 -0500
Subject: [PATCH 16/19] Update.

---
 scripts/gen_benchmark_script_students.py | 117 +++++++++++++++--------
 scripts/plot_benchmark_students.py       |  37 +++++--
 scripts/requirements.txt                 |   1 +
 scripts/store_huge_fake_data_in_db.py    |  75 ++++++++++-----
 4 files changed, 157 insertions(+), 73 deletions(-)

diff --git a/scripts/gen_benchmark_script_students.py b/scripts/gen_benchmark_script_students.py
index 26535e50..d8d05381 100644
--- a/scripts/gen_benchmark_script_students.py
+++ b/scripts/gen_benchmark_script_students.py
@@ -1,5 +1,7 @@
 import sys
 import os
+from datetime import datetime
+import argparse
 
 SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000]
 SIZES_STUDENT01 = list(range(0, 101, 20))
@@ -8,59 +10,90 @@
 NB_REQUESTS = 10
 
 
-def main():
-    if len(sys.argv) != 2:
-        print("Missing output folder name", file=sys.stderr)
-        exit(1)
+def main(argv):
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--disable-index",
+        action="store_true",
+        help="If specified, will not create MongoDB index when storing fake data.",
+    )
+    args = parser.parse_args(argv[1:])
+    print("Generating benchmark script with args:", args, file=sys.stderr)
+
+    bench_date = datetime.now()
+    bench_basename = "bench_students"
+    if args.disable_index:
+        bench_basename += "_noindex"
+    bench_name = f"{bench_basename}_{bench_date}".replace(" ", "_").replace(":", "-")
+    assert not os.path.exists(bench_name)
+    os.mkdir(bench_name)
+
+    script_name = f"{bench_name}.sh"
+    with open(script_name, "w") as file:
+        print("set -eu", file=file)
+        print("export CLOCKWORK_API_KEY='000aaa01'", file=file)
+        print("export CLOCKWORK_EMAIL='student01@mila.quebec'", file=file)
+        print(file=file)
 
-    wd = sys.argv[1]
-    if not os.path.exists(wd):
-        os.mkdir(wd)
+        for std_00 in SIZES_STUDENT00:
+            for std_01 in SIZES_STUDENT01:
+                gen_commands(std_00, std_01, bench_name, args, file)
 
-    print("set -eu")
-    print("export CLOCKWORK_API_KEY='000aaa01'")
-    print("export CLOCKWORK_EMAIL='student01@mila.quebec'")
-    print()
+        print(file=file)
+        print(f"python3 scripts/plot_benchmark_students.py {bench_name}", file=file)
+        print(f"tar -cf {bench_name}.tar {bench_name}/", file=file)
+        print(f"echo Benchmark compressed in: {bench_name}.tar", file=file)
 
-    for std_00 in SIZES_STUDENT00:
-        for std_01 in SIZES_STUDENT01:
-            gen_commands(std_00, std_01, wd)
+    print("Benchmark script saved in:", script_name, file=sys.stderr)
 
 
-def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory):
-    task_name = f"student00-{nb_jobs_student00:06}_student01-{nb_jobs_student01:06}"
+def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory, args, file):
     nb_dicts = nb_jobs_student00 + nb_jobs_student01
-    nb_props_per_dict = NB_PROPS_PER_DICT
+    task_name = (
+        f"std00-{nb_jobs_student00:06}_"
+        f"std01-{nb_jobs_student01:06}_"
+        f"dicts-{nb_dicts}_"
+        f"props-{NB_PROPS_PER_DICT}_"
+        f"index-{0 if args.disable_index else 1}"
+    )
 
-    cmd_fake_data = (
-        f"python3 scripts/store_huge_fake_data_in_db.py "
-        f"-j student00={nb_jobs_student00} "
-        f"-j student01={nb_jobs_student01} "
-        f"--nb-dicts {nb_dicts} "
-        f"--nb-props-per-dict {nb_props_per_dict}"
+    print(
+        (
+            f"python3 scripts/store_huge_fake_data_in_db.py "
+            f"-j student00={nb_jobs_student00} "
+            f"-j student01={nb_jobs_student01} "
+            f"--nb-dicts {nb_dicts} "
+            f"--nb-props-per-dict {NB_PROPS_PER_DICT} "
+            f"--props-username student01@mila.quebec "
+            f"{'--disable-index' if args.disable_index else ''}"
+        ),
+        file=file,
     )
-    cmd_benchmark = (
-        f"python3 scripts/job_request_benchmark.py "
-        f"-w {working_directory} "
-        f'--address "0.0.0.0" '
-        f"--port 5000 "
-        f'--username "student01@mila.quebec" '
-        f"--nb-requests {NB_REQUESTS} "
-        f"--output {task_name}"
+    print('python3 -m flask run --host="0.0.0.0" &', file=file)
+    print("export SERVER_PID=$!", file=file)
+    print("sleep 1", file=file)
+    print(
+        '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''',
+        file=file,
     )
-
-    print(cmd_fake_data)
-    print('python3 -m flask run --host="0.0.0.0" &')
-    print("export SERVER_PID=$!")
-    print("sleep 1")
     print(
-        '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"'''
+        (
+            f"python3 scripts/job_request_benchmark.py "
+            f"-w {working_directory} "
+            f'--address "0.0.0.0" '
+            f"--port 5000 "
+            f'--username "student01@mila.quebec" '
+            f"--nb-requests {NB_REQUESTS} "
+            f"--output {task_name}"
+        ),
+        file=file,
     )
-    print(cmd_benchmark)
-    print("kill $SERVER_PID")
-    print("export SERVER_PID=")
-    print()
+    print("kill $SERVER_PID", file=file)
+    print("export SERVER_PID=", file=file)
+    print(file=file)
 
 
 if __name__ == "__main__":
-    main()
+    main(sys.argv)
diff --git a/scripts/plot_benchmark_students.py b/scripts/plot_benchmark_students.py
index 5ae29b2b..c39086fe 100644
--- a/scripts/plot_benchmark_students.py
+++ b/scripts/plot_benchmark_students.py
@@ -25,18 +25,33 @@ def main():
     folder = sys.argv[1]
     stats_file_names = []
     for name in os.listdir(folder):
-        if name.startswith("student00-") and name.endswith(".json"):
+        if name.startswith("std00-") and name.endswith(".json"):
             stats_file_names.append(name)
 
     # Get stat data.
     stats = {}
+    infos_nb_props = set()
+    infos_index = set()
     for name in sorted(stats_file_names):
         title, extension = name.split(".")
-        info_student00, info_student01 = title.split("_")
+        (
+            info_student00,
+            info_student01,
+            info_nb_dicts,
+            info_nb_props,
+            info_index,
+        ) = title.split("_")
         _, nb_jobs_student00 = info_student00.split("-")
         _, nb_jobs_student01 = info_student01.split("-")
+        _, nb_dicts = info_nb_dicts.split("-")
+        _, nb_props = info_nb_props.split("-")
+        _, nb_index = info_index.split("-")
         nb_jobs_student00 = int(nb_jobs_student00)
         nb_jobs_student01 = int(nb_jobs_student01)
+        nb_props = int(nb_props)
+        nb_index = int(nb_index)
+        infos_nb_props.add(nb_props)
+        infos_index.add(nb_index)
 
         with open(os.path.join(folder, name)) as file:
             local_stats = json.load(file)
@@ -46,10 +61,17 @@ def main():
             durations = sorted(stat["pc_nanoseconds"] for stat in local_stats)
             stats[(nb_jobs_student00, nb_jobs_student01)] = durations
 
-    _plots_request_time_per_nb_jobs(stats, folder)
+    assert len(infos_nb_props) == 1
+    assert len(infos_index) == 1
+    nb_props = next(iter(infos_nb_props))
+    nb_index = next(iter(infos_index))
+    output_name = f"nb-student01-jobs-to-time_props-{nb_props}_index-{nb_index}"
+    _plots_request_time_per_nb_jobs(stats, folder, output_name, nb_props, nb_index)
 
 
-def _plots_request_time_per_nb_jobs(stats: dict, folder: str):
+def _plots_request_time_per_nb_jobs(
+    stats: dict, folder: str, output_name: str, nb_props: int, has_index: int
+):
     cdict = {
         "red": (
             (0.0, 0.0, 0.0),
@@ -93,11 +115,14 @@ def _plots_request_time_per_nb_jobs(stats: dict, folder: str):
         )
         # _show_points(xs, ys)
 
-    ax.set_title("Request duration per number of jobs for student01")
+    ax.set_title(
+        f"Request duration per number of jobs for student01 ({nb_props} props per dict)"
+        + (" (no MongoDB index)" if not has_index else "")
+    )
     ax.set_xlabel("Number of student01's jobs in DB")
     ax.set_ylabel("Request duration in seconds")
     ax.legend()
-    plot_path = os.path.join(folder, f"nb_student01_jobs_to_time.jpg")
+    plot_path = os.path.join(folder, f"{output_name}.jpg")
     plt.gcf().set_size_inches(20, 10)
     plt.savefig(plot_path, bbox_inches="tight")
     plt.close(fig)
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 77c7ad90..6af79857 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -9,3 +9,4 @@ MarkupSafe==2.1.3
 pyasn1==0.5.0
 pymongo==4.5.0
 Werkzeug==3.0.1
+matplotlib==3.8.3
diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index 73fd07e5..e3791d61 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -35,7 +35,6 @@
 
 import argparse
 import sys
-from datetime import datetime
 
 from clockwork_web.config import register_config
 from slurm_state.mongo_client import get_mongo_client
@@ -345,6 +344,7 @@ def _generate_huge_fake_data(
     nb_student_jobs=None,
     nb_dicts=DEFAULT_NB_DICTS,
     nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT,
+    props_username="student00@mila.quebec",
 ):
     student_to_nb_jobs = []
     if nb_student_jobs is not None:
@@ -423,12 +423,9 @@ def _generate_huge_fake_data(
             jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})
 
     # populate job-user-dicts
-    props_editor = (
-        "student01@mila.quebec" if nb_student_jobs else "student00@mila.quebec"
-    )
     job_user_dicts = [
         {
-            "mila_email_username": props_editor,
+            "mila_email_username": props_username,
             "job_id": i + 1,
             "cluster_name": "beluga",
             "props": {
@@ -446,29 +443,41 @@ def _generate_huge_fake_data(
 
 
 def populate_fake_data(db_insertion_point, **kwargs):
+    disable_index = kwargs.pop("disable_index", False)
+
     print("Generating huge fake data")
     E = _generate_huge_fake_data(**kwargs)
     print("Generated huge fake data")
 
-    # Create indices. This isn't half as important as when we're
-    # dealing with large quantities of data, but it's part of the
-    # set up for the database.
-    db_insertion_point["jobs"].create_index(
-        [("slurm.job_id", 1), ("slurm.cluster_name", 1)],
-        name="job_id_and_cluster_name",
-    )
-    db_insertion_point["nodes"].create_index(
-        [("slurm.name", 1), ("slurm.cluster_name", 1)],
-        name="name_and_cluster_name",
-    )
-    db_insertion_point["users"].create_index(
-        [("mila_email_username", 1)], name="users_email_index"
-    )
-    db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
-    db_insertion_point["job_user_props"].create_index(
-        [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)],
-        name="job_user_props_index",
-    )
+    if not disable_index:
+        print("Generate MongoDB index.")
+        # Create indices. This isn't half as important as when we're
+        # dealing with large quantities of data, but it's part of the
+        # set up for the database.
+        db_insertion_point["jobs"].create_index(
+            [
+                ("slurm.job_id", 1),
+                ("slurm.cluster_name", 1),
+                ("cw.mila_email_username", 1),
+            ],
+            name="job_id_and_cluster_name",
+        )
+        db_insertion_point["nodes"].create_index(
+            [("slurm.name", 1), ("slurm.cluster_name", 1)],
+            name="name_and_cluster_name",
+        )
+        db_insertion_point["users"].create_index(
+            [("mila_email_username", 1)], name="users_email_index"
+        )
+        db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
+        db_insertion_point["job_user_props"].create_index(
+            [
+                ("mila_email_username", 1),
+                ("job_id", 1),
+                ("cluster_name", 1),
+            ],
+            name="job_user_props_index",
+        )
 
     for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
         # Anyway clean before inserting
@@ -498,7 +507,10 @@ def main(argv):
         "--nb-student-jobs",
         action="append",
         type=str,
-        help="Number of job for a specific student, in format: <student>=<nb-jobs>. Accept multiple declarations. Example: -j student00=100 -j student05=1900",
+        help=(
+            "Number of job for a specific student, in format: <student>=<nb-jobs>. "
+            "Accept multiple declarations. Example: -j student00=100 -j student05=1900"
+        ),
     )
     group.add_argument(
         "--nb-jobs",
@@ -518,6 +530,17 @@ def main(argv):
         default=DEFAULT_NB_PROPS_PER_DICT,
         help=f"Number of key-value pairs in each job-user dict.",
     )
+    parser.add_argument(
+        "--props-username",
+        type=str,
+        default="student00@mila.quebec",
+        help="Email of user who creates job-user dicts.",
+    )
+    parser.add_argument(
+        "--disable-index",
+        action="store_true",
+        help="If specified, will not create MongoDB index.",
+    )
     args = parser.parse_args(argv[1:])
     print(args)
 
@@ -531,6 +554,8 @@ def main(argv):
         nb_student_jobs=args.nb_student_jobs,
         nb_dicts=args.nb_dicts,
         nb_props_per_dict=args.nb_props_per_dict,
+        props_username=args.props_username,
+        disable_index=args.disable_index,
     )
 
 

From bad73f5baa0fc0ab78621c7f9bcd24a2778dda84 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 4 Mar 2024 14:03:08 -0500
Subject: [PATCH 17/19] Make sure to clean collections and indexes before
 inserting new fake date.

---
 scripts/store_huge_fake_data_in_db.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py
index e3791d61..00f36051 100644
--- a/scripts/store_huge_fake_data_in_db.py
+++ b/scripts/store_huge_fake_data_in_db.py
@@ -449,6 +449,11 @@ def populate_fake_data(db_insertion_point, **kwargs):
     E = _generate_huge_fake_data(**kwargs)
     print("Generated huge fake data")
 
+    # Drop any collection (and related index) before.
+    for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
+        db_insertion_point[k].drop()
+        assert not list(db_insertion_point[k].list_indexes())
+
     if not disable_index:
         print("Generate MongoDB index.")
         # Create indices. This isn't half as important as when we're
@@ -479,6 +484,9 @@ def populate_fake_data(db_insertion_point, **kwargs):
             name="job_user_props_index",
         )
 
+        for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
+            assert list(db_insertion_point[k].list_indexes())
+
     for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
         # Anyway clean before inserting
         db_insertion_point[k].delete_many({})

From 6bad887d628cf6cea4dd3b95f581df212f543800 Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Tue, 5 Mar 2024 11:53:53 -0500
Subject: [PATCH 18/19] Check if current_user is available before getting job
 user props.

---
 clockwork_web/core/jobs_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
index d70d699b..26831374 100644
--- a/clockwork_web/core/jobs_helper.py
+++ b/clockwork_web/core/jobs_helper.py
@@ -159,7 +159,7 @@ def get_filtered_and_paginated_jobs(
         LD_jobs = list(mc["jobs"].find(mongodb_filter))
 
     # Get job user props
-    if LD_jobs:
+    if LD_jobs and current_user:
         user_props_map = {}
         # Collect all job user props related to found jobs,
         # and store them in a dict with keys (mila email username, job ID, cluster_name)

From a7b2f4f108afd5671b467bb0717e8f97543db96f Mon Sep 17 00:00:00 2001
From: notoraptor <stevenbocco@gmail.com>
Date: Mon, 18 Mar 2024 09:12:03 -0400
Subject: [PATCH 19/19] server_benchmark_locust: add a commented code. Code
 allows to use EMAIL as username to query requests, instead of randomly taking
 in server usernames.

---
 scripts/server_benchmark_locust.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/server_benchmark_locust.py b/scripts/server_benchmark_locust.py
index 00eeaf18..501fb245 100644
--- a/scripts/server_benchmark_locust.py
+++ b/scripts/server_benchmark_locust.py
@@ -107,6 +107,7 @@ def __init__(self, *args, **kwargs):
         global NEXT_USER_ID
         super().__init__(*args, **kwargs)
         self.username = USERNAMES[NEXT_USER_ID % len(USERNAMES)]
+        # self.username = EMAIL
         # Move to next username for next user
         NEXT_USER_ID += 1
         print("Username:", NEXT_USER_ID, self.username)