diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py index 289c751a..72bbfc18 100644 --- a/clockwork_web/browser_routes/jobs.py +++ b/clockwork_web/browser_routes/jobs.py @@ -101,6 +101,8 @@ def route_search(): - "sort_asc" is an optional integer and used to specify if sorting is ascending (1) or descending (-1). Default is 1. - "job_array" is optional and used to specify the job array in which we are looking for jobs + - "user_prop_name" is optional and used to specify the user prop name associated to jobs we are looking for + - "user_prop_content" is optional and used to specify the user prop value associated to jobs we are looking for .. :quickref: list all Slurm job as formatted html """ @@ -164,6 +166,8 @@ def route_search(): "sort_by": query.sort_by, "sort_asc": query.sort_asc, "job_array": query.job_array, + "user_prop_name": query.user_prop_name, + "user_prop_content": query.user_prop_content, }, ) diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py index a9ef4c72..26831374 100644 --- a/clockwork_web/core/jobs_helper.py +++ b/clockwork_web/core/jobs_helper.py @@ -7,6 +7,7 @@ import time from flask.globals import current_app +from flask_login import current_user from ..db import get_db @@ -157,6 +158,45 @@ def get_filtered_and_paginated_jobs( # on the server because not enough memory was allocated to perform the sorting. LD_jobs = list(mc["jobs"].find(mongodb_filter)) + # Get job user props + if LD_jobs and current_user: + user_props_map = {} + # Collect all job user props related to found jobs, + # and store them in a dict with keys (mila email username, job ID, cluster_name) + for user_props in list( + mc["job_user_props"].find( + combine_all_mongodb_filters( + { + "job_id": { + "$in": [int(job["slurm"]["job_id"]) for job in LD_jobs] + }, + "mila_email_username": current_user.mila_email_username, + } + ) + ) + ): + key = ( + user_props["mila_email_username"], + user_props["job_id"], + user_props["cluster_name"], + ) + assert key not in user_props_map + user_props_map[key] = user_props["props"] + + if user_props_map: + # Populate jobs with user props using + # current user email, job ID and job cluster name + # to find related user props in props map. + for job in LD_jobs: + key = ( + # job["cw"]["mila_email_username"], + current_user.mila_email_username, + int(job["slurm"]["job_id"]), + job["slurm"]["cluster_name"], + ) + if key in user_props_map: + job["job_user_props"] = user_props_map[key] + # Set nbr_total_jobs if want_count: # Get the number of filtered jobs (not paginated) @@ -235,6 +275,8 @@ def get_jobs( sort_by="submit_time", sort_asc=-1, job_array=None, + user_prop_name=None, + user_prop_content=None, ): """ Set up the filters according to the parameters and retrieve the requested jobs from the database. @@ -252,6 +294,8 @@ def get_jobs( sort_asc Whether or not to sort in ascending order (1) or descending order (-1). job_array ID of job array in which we look for jobs. + user_prop_name name of user prop (string) we must find in jobs to look for. + user_prop_content content of user prop (string) we must find in jobs to look for. Returns: A tuple containing: @@ -259,6 +303,24 @@ def get_jobs( - the total number of jobs corresponding of the filters in the databse, if want_count has been set to True, None otherwise, as second element """ + # If job user prop is specified, + # get job indices from jobs associated to this prop. + if user_prop_name is not None and user_prop_content is not None: + mc = get_db() + props_job_ids = [ + str(user_props["job_id"]) + for user_props in mc["job_user_props"].find( + combine_all_mongodb_filters( + {f"props.{user_prop_name}": user_prop_content} + ) + ) + ] + if job_ids: + # If job ids where provided, make intersection between given job ids and props job ids. + job_ids = list(set(props_job_ids) & set(job_ids)) + else: + # Otherwise, just use props job ids. + job_ids = props_job_ids # Set up and combine filters filter = get_global_filter( @@ -405,6 +467,7 @@ def get_jobs_properties_list_per_page(): "user", "job_id", "job_array", + "job_user_props", "job_name", "job_state", "start_time", diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py index 8d80b33e..2650c201 100644 --- a/clockwork_web/core/search_helper.py +++ b/clockwork_web/core/search_helper.py @@ -21,6 +21,8 @@ def parse_search_request(user, args, force_pagination=True): want_count = to_boolean(want_count) job_array = args.get("job_array", type=int, default=None) + user_prop_name = args.get("user_prop_name", type=str, default=None) or None + user_prop_content = args.get("user_prop_content", type=str, default=None) or None default_page_number = "1" if force_pagination else None @@ -71,6 +73,8 @@ def parse_search_request(user, args, force_pagination=True): sort_asc=sort_asc, want_count=want_count, job_array=job_array, + user_prop_name=user_prop_name, + user_prop_content=user_prop_content, ) ######################### @@ -115,5 +119,7 @@ def search_request(user, args, force_pagination=True): sort_by=query.sort_by, sort_asc=query.sort_asc, job_array=query.job_array, + user_prop_name=query.user_prop_name, + user_prop_content=query.user_prop_content, ) return (query, jobs, nbr_total_jobs) diff --git a/clockwork_web/core/users_helper.py b/clockwork_web/core/users_helper.py index 86862e64..30ce7265 100644 --- a/clockwork_web/core/users_helper.py +++ b/clockwork_web/core/users_helper.py @@ -592,19 +592,30 @@ def render_template_with_user_settings(template_name_or_list, **context): # Get cluster status (if jobs are old and cluster has error). for cluster_name in context["clusters"]: - # Default status values. - jobs_are_old = False + # Cluster error cannot yet be checked, so + # cluster_has_error is always False for now. cluster_has_error = False + context["clusters"][cluster_name]["status"] = { + "jobs_are_old": _jobs_are_old(cluster_name), + "cluster_has_error": cluster_has_error, + } - # Check if jobs are old. - jobs, _ = get_jobs(cluster_names=[cluster_name]) - job_dates = [ - job["cw"]["last_slurm_update"] - for job in jobs - if "last_slurm_update" in job["cw"] - ] - if job_dates: - most_recent_job_edition = max(job_dates) + return render_template(template_name_or_list, **context) + + +def _jobs_are_old(cluster_name): + jobs_are_old = False + + mongodb_filter = {"slurm.cluster_name": cluster_name} + mc = get_db() + job_with_max_cw_last_slurm_update = list( + mc["jobs"].find(mongodb_filter).sort([("cw.last_slurm_update", -1)]).limit(1) + ) + + if job_with_max_cw_last_slurm_update: + (job,) = job_with_max_cw_last_slurm_update + if "last_slurm_update" in job["cw"]: + most_recent_job_edition = job["cw"]["last_slurm_update"] current_timestamp = datetime.now().timestamp() elapsed_time = timedelta( seconds=current_timestamp - most_recent_job_edition @@ -613,12 +624,4 @@ def render_template_with_user_settings(template_name_or_list, **context): max_delay = timedelta(days=30) jobs_are_old = elapsed_time > max_delay - # Cluster error cannot yet be checked, so - # cluster_has_error is always False for now. - - context["clusters"][cluster_name]["status"] = { - "jobs_are_old": jobs_are_old, - "cluster_has_error": cluster_has_error, - } - - return render_template(template_name_or_list, **context) + return jobs_are_old diff --git a/clockwork_web/templates/base.html b/clockwork_web/templates/base.html index bf79e7ea..8730dcd2 100644 --- a/clockwork_web/templates/base.html +++ b/clockwork_web/templates/base.html @@ -24,7 +24,7 @@ - + @@ -323,6 +323,12 @@

{% if previous_request_args['job_array'] is not none %} + {% endif %} + {% if previous_request_args['user_prop_name'] is not none %} + + {% endif %} + {% if previous_request_args['user_prop_content'] is not none %} + {% endif %}
@@ -334,6 +340,13 @@

{% endif %} + + {% if previous_request_args['user_prop_name'] is not none and previous_request_args['user_prop_content'] is not none %} + + User prop {{ previous_request_args['user_prop_name'] }}: "{{ previous_request_args['user_prop_content'] }}"     + + + {% endif %}

diff --git a/clockwork_web/templates/jobs_search.html b/clockwork_web/templates/jobs_search.html index 53077a6e..f225a7c0 100644 --- a/clockwork_web/templates/jobs_search.html +++ b/clockwork_web/templates/jobs_search.html @@ -101,6 +101,10 @@

JOBS

{% if (web_settings | check_web_settings_column_display(page_name, "job_array")) %} Job array {% endif %} + + {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %} + Job-user props + {% endif %} {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %} {% set sort_by = "name" %} @@ -193,6 +197,20 @@

JOBS

{% endif %} + + {% if (web_settings | check_web_settings_column_display(page_name, "job_user_props")) %} + + {% for D_user_prop_name, D_user_prop_content in D_job.get('job_user_props', {}).items() %} +

+ + {{ D_user_prop_name }}
+ {{ D_user_prop_content }} +
+

+ {% endfor %} + + {% endif %} + {% if (web_settings | check_web_settings_column_display(page_name, "job_name")) %} {{D_job['slurm'].get("name", "")[0:20]}} diff --git a/clockwork_web/templates/settings.html b/clockwork_web/templates/settings.html index a2d28e3c..a9b04f51 100644 --- a/clockwork_web/templates/settings.html +++ b/clockwork_web/templates/settings.html @@ -279,6 +279,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {{ gettext("User (@mila.quebec)") }} {{ gettext("Job ID") }} {{ gettext("Job array") }} + {{ gettext("Job-user props") }} {{ gettext("Job name [:20]") }} {{ gettext("Job state") }} {{ gettext("Submit time") }} @@ -291,7 +292,7 @@

{{ gettext("User settings %(mila_email_username)s", mila_email_username=curr {% set page_name = "jobs_list" %} - {% for column_name in ["clusters", "user","job_id", "job_array", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %} + {% for column_name in ["clusters", "user","job_id", "job_array", "job_user_props", "job_name", "job_state", "submit_time", "start_time", "end_time", "links"] %}
{% if (web_settings | check_web_settings_column_display(page_name, column_name)) %} diff --git a/scripts/gen_benchmark_script_students.py b/scripts/gen_benchmark_script_students.py new file mode 100644 index 00000000..d8d05381 --- /dev/null +++ b/scripts/gen_benchmark_script_students.py @@ -0,0 +1,99 @@ +import sys +import os +from datetime import datetime +import argparse + +SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000] +SIZES_STUDENT01 = list(range(0, 101, 20)) +NB_PROPS_PER_DICT = 4 + +NB_REQUESTS = 10 + + +def main(argv): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--disable-index", + action="store_true", + help="If specified, will not create MongoDB index when storing fake data.", + ) + args = parser.parse_args(argv[1:]) + print("Generating benchmark script with args:", args, file=sys.stderr) + + bench_date = datetime.now() + bench_basename = "bench_students" + if args.disable_index: + bench_basename += "_noindex" + bench_name = f"{bench_basename}_{bench_date}".replace(" ", "_").replace(":", "-") + assert not os.path.exists(bench_name) + os.mkdir(bench_name) + + script_name = f"{bench_name}.sh" + with open(script_name, "w") as file: + print("set -eu", file=file) + print("export CLOCKWORK_API_KEY='000aaa01'", file=file) + print("export CLOCKWORK_EMAIL='student01@mila.quebec'", file=file) + print(file=file) + + for std_00 in SIZES_STUDENT00: + for std_01 in SIZES_STUDENT01: + gen_commands(std_00, std_01, bench_name, args, file) + + print(file=file) + print(f"python3 scripts/plot_benchmark_students.py {bench_name}", file=file) + print(f"tar -cf {bench_name}.tar {bench_name}/", file=file) + print(f"echo Benchmark compressed in: {bench_name}.tar", file=file) + + print("Benchmark script saved in:", script_name, file=sys.stderr) + + +def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory, args, file): + nb_dicts = nb_jobs_student00 + nb_jobs_student01 + task_name = ( + f"std00-{nb_jobs_student00:06}_" + f"std01-{nb_jobs_student01:06}_" + f"dicts-{nb_dicts}_" + f"props-{NB_PROPS_PER_DICT}_" + f"index-{0 if args.disable_index else 1}" + ) + + print( + ( + f"python3 scripts/store_huge_fake_data_in_db.py " + f"-j student00={nb_jobs_student00} " + f"-j student01={nb_jobs_student01} " + f"--nb-dicts {nb_dicts} " + f"--nb-props-per-dict {NB_PROPS_PER_DICT} " + f"--props-username student01@mila.quebec " + f"{'--disable-index' if args.disable_index else ''}" + ), + file=file, + ) + print('python3 -m flask run --host="0.0.0.0" &', file=file) + print("export SERVER_PID=$!", file=file) + print("sleep 1", file=file) + print( + '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''', + file=file, + ) + print( + ( + f"python3 scripts/job_request_benchmark.py " + f"-w {working_directory} " + f'--address "0.0.0.0" ' + f"--port 5000 " + f'--username "student01@mila.quebec" ' + f"--nb-requests {NB_REQUESTS} " + f"--output {task_name}" + ), + file=file, + ) + print("kill $SERVER_PID", file=file) + print("export SERVER_PID=", file=file) + print(file=file) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/gen_job_request_benchmark_script.py b/scripts/gen_job_request_benchmark_script.py new file mode 100644 index 00000000..21aa7f00 --- /dev/null +++ b/scripts/gen_job_request_benchmark_script.py @@ -0,0 +1,61 @@ +import sys + +# Ns = [i * 10_000 for i in range(16)] +Ns = [i * 10_000 for i in range(11)] +Ks = (1, 500) +N = Ns[-1] + +NB_REQUESTS = 10 + + +def main(): + if len(sys.argv) == 2: + wd = sys.argv[1] + else: + wd = "local" + + print("set -eu") + + for nb_props_per_dict in Ks: + for nb_dicts in Ns: + gen_commands(N, nb_dicts, nb_props_per_dict, wd) + + for nb_jobs in Ns[:-1]: + gen_commands(nb_jobs, 0, 1, wd) + + for nb_props_per_dict in Ks: + for nb_jobs in Ns[:-1]: + gen_commands(nb_jobs, N, nb_props_per_dict, wd) + + +def gen_commands(nb_jobs, nb_dicts, nb_props_per_dict, working_directory): + task_name = f"jobs-{nb_jobs:06}_dicts-{nb_dicts:06}_props-{nb_props_per_dict:03}" + + cmd_fake_data = ( + f"python3 scripts/store_huge_fake_data_in_db.py " + f"--nb-jobs {nb_jobs} " + f"--nb-dicts {nb_dicts} " + f"--nb-props-per-dict {nb_props_per_dict}" + ) + cmd_benchmark = ( + f"python3 scripts/job_request_benchmark.py " + f"--config {working_directory}/config.json " + f"--nb-requests {NB_REQUESTS} " + f"--output {task_name}" + ) + + print(cmd_fake_data) + print('python3 -m flask run --host="0.0.0.0" &') + print("export SERVER_PID=$!") + print("sleep 1") + print( + '''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''' + ) + print(cmd_benchmark) + print("kill $SERVER_PID") + print("export SERVER_PID=") + print() + + +if __name__ == "__main__": + main() diff --git a/scripts/job_request_benchmark.py b/scripts/job_request_benchmark.py new file mode 100644 index 00000000..cb293c58 --- /dev/null +++ b/scripts/job_request_benchmark.py @@ -0,0 +1,221 @@ +import os + +import argparse +import sys +import logging +import time +from collections import namedtuple +import json + +try: + from clockwork_tools.client import ClockworkToolsClient +except Exception: + print( + "Clockwork tools needed. You can install it with `cd clockwork_tools` then `pip install -e .`" + ) + raise + + +log_format = "%(levelname)s:%(name)s:%(asctime)s: %(message)s" +logging.basicConfig(level=logging.INFO, format=log_format) + +logger = logging.getLogger("server_benchmark") + + +class CallStat( + namedtuple( + "CallStat", ("username", "nb_jobs", "pt_start", "pt_end", "pc_start", "pc_end") + ) +): + """ + Class to collect stats and time for 1 request. + + Python provides 2 precision functions for profiling: + - time.process_time_ns(): only process time, does not include sleep times. + - time.perf_counter_ns(): includes sleep times. + + I made a mistake in previous commits because I measured requests using + process_time(). Thus, request times looked very small, as they don't + include sleeps, which are used to wait for server response. + + So, I decided to measure both process time and full (perf_counter) time + to check how they differ: + - process time is still very small (less than 0.10 seconds) + and correctly approximated with a linear regression wr/t nunber of jobs. + - full time (perf_counter) is very much higher, sometimes up to 10 seconds, + and way more irregular (badly approximated with linear regression). + + In practice, I guess the relevant measure is full time (with perf_counter), + as it correctly represents how much time user could wait to get response + ** if he gets all jobs at once without pagination **. + """ + + @property + def pt_nanoseconds(self): + """Duration measured with process time.""" + return self.pt_end - self.pt_start + + @property + def pc_nanoseconds(self): + """Duration measured with perf counter (full duration).""" + return self.pc_end - self.pc_start + + def summary(self): + return { + "nb_jobs": self.nb_jobs, + "pc_nanoseconds": self.pc_nanoseconds, + } + + +class BenchmarkClient(ClockworkToolsClient): + """Client with a specific method for profiling.""" + + def profile_getting_user_jobs(self, username: str = None) -> CallStat: + """Profile a request `jobs/list` with given username and return a CallStat.""" + pc_start = time.perf_counter_ns() + pt_start = time.process_time_ns() + jobs = self.jobs_list(username) + pt_end = time.process_time_ns() + pc_end = time.perf_counter_ns() + return CallStat( + username=username, + nb_jobs=len(jobs), + pc_start=pc_start, + pc_end=pc_end, + pt_start=pt_start, + pt_end=pt_end, + ) + + +def main(): + argv = sys.argv + parser = argparse.ArgumentParser( + prog=argv[0], + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument("-a", "--address", help="Server host.") + parser.add_argument("-p", "--port", type=int, default=443, help="Server port.") + parser.add_argument( + "--config", + type=str, + help=( + "Optional JSON configuration file to use for benchmarking. " + "If not specified, use --address, --port, and OS environment variables for clockwork api key and email. " + "If file exists, ignore --address, --port and OS variables, and read config from file. " + "If file does not exist, create file with config values from --address, --port and OS variables. " + "Configuration file must contain a dictionary with keys " + "'address' (str), 'port` (int), 'api_key` (str), 'email' (str)." + ), + ) + parser.add_argument( + "-w", + "--working-directory", + type=str, + default=".", + help=( + "Working directory. " + "Default is '.'. " + "If `--config` specified, `--working-directory` is ignored " + "and working directory is config folder." + ), + ) + parser.add_argument( + "-u", + "--username", + type=str, + help=( + "Optional email of specific username for which we want to search jobs. " + "By default, no username is specified, and all jobs visible by logged user " + "(using client email an api key) are retrieved." + ), + ) + parser.add_argument( + "-n", + "--nb-requests", + type=int, + default=10, + help="Number of requests to send (default, 10).", + ) + parser.add_argument( + "-o", + "--output", + type=str, + required=True, + help="Benchmark name, used to save stats on disk. " + "Saved in /.json", + ) + args = parser.parse_args(argv[1:]) + print("Arguments:", args) + + if args.nb_requests < 1: + logger.error(f"No positive time specified for benchmarking, exit.") + sys.exit(1) + + config_path = None + working_directory = args.working_directory + if args.config: + config_path = os.path.abspath(args.config) + working_directory = os.path.dirname(config_path) + # Save next log messages into a file. + log_formatter = logging.Formatter(log_format) + log_path = os.path.join(working_directory, f"bench_{args.output}.log") + logger.info(f"Saving log in: {log_path}") + file_handler = logging.FileHandler(log_path) + file_handler.setFormatter(log_formatter) + logger.addHandler(file_handler) + + if config_path and os.path.isfile(config_path): + # Read config file if available. + with open(config_path) as file: + config = json.load(file) + address = config["address"] + port = config["port"] + api_key = config["api_key"] + email = config["email"] + logger.info(f"Loaded config from file: address: {address}, port: {port}") + else: + address = args.address + port = args.port + # API key and email will be retrieved from OS environment in client constructor. + api_key = None + email = None + if not address: + logger.error( + "Either --address or --config (with existing file) is required." + ) + sys.exit(1) + + client = BenchmarkClient( + host=address, port=port, clockwork_api_key=api_key, email=email + ) + + output = [] + for i in range(args.nb_requests): + cs = client.profile_getting_user_jobs(username=args.username) + logger.info( + f"[{i + 1}] Sent request for username in {cs.pc_nanoseconds / 1e9} seconds, " + f"received {cs.nb_jobs} jobs." + ) + output.append(cs.summary()) + + if config_path and not os.path.exists(config_path): + # If args.config is defined, we save config file if args.config does not exist. + config = { + "address": client.host, + "port": client.port, + "api_key": client.clockwork_api_key, + "email": client.email, + } + with open(config_path, "w") as file: + json.dump(config, file) + logger.info(f"Saved config file at: {config_path}") + + output_path = os.path.join(working_directory, f"{args.output}.json") + with open(output_path, "w") as file: + json.dump(output, file) + logger.info(f"Saved stats at: {output_path}") + logger.info("End.") + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_benchmark_students.py b/scripts/plot_benchmark_students.py new file mode 100644 index 00000000..c39086fe --- /dev/null +++ b/scripts/plot_benchmark_students.py @@ -0,0 +1,149 @@ +import os +import sys +import json + + +try: + import matplotlib.pyplot as plt + from matplotlib import colors + + # plt.figure(figure=(10.8, 7.2), dpi=100) +except Exception: + print( + "Matplotlib needed. You can install it with `pip install matplotlib`", + file=sys.stderr, + ) + raise + + +def main(): + if len(sys.argv) != 2: + print("Missing stats folder", file=sys.stderr) + sys.exit(1) + + # Get stat files. + folder = sys.argv[1] + stats_file_names = [] + for name in os.listdir(folder): + if name.startswith("std00-") and name.endswith(".json"): + stats_file_names.append(name) + + # Get stat data. + stats = {} + infos_nb_props = set() + infos_index = set() + for name in sorted(stats_file_names): + title, extension = name.split(".") + ( + info_student00, + info_student01, + info_nb_dicts, + info_nb_props, + info_index, + ) = title.split("_") + _, nb_jobs_student00 = info_student00.split("-") + _, nb_jobs_student01 = info_student01.split("-") + _, nb_dicts = info_nb_dicts.split("-") + _, nb_props = info_nb_props.split("-") + _, nb_index = info_index.split("-") + nb_jobs_student00 = int(nb_jobs_student00) + nb_jobs_student01 = int(nb_jobs_student01) + nb_props = int(nb_props) + nb_index = int(nb_index) + infos_nb_props.add(nb_props) + infos_index.add(nb_index) + + with open(os.path.join(folder, name)) as file: + local_stats = json.load(file) + nbs_jobs = {stat["nb_jobs"] for stat in local_stats} + assert len(nbs_jobs) == 1 + assert next(iter(nbs_jobs)) == nb_jobs_student01 + durations = sorted(stat["pc_nanoseconds"] for stat in local_stats) + stats[(nb_jobs_student00, nb_jobs_student01)] = durations + + assert len(infos_nb_props) == 1 + assert len(infos_index) == 1 + nb_props = next(iter(infos_nb_props)) + nb_index = next(iter(infos_index)) + output_name = f"nb-student01-jobs-to-time_props-{nb_props}_index-{nb_index}" + _plots_request_time_per_nb_jobs(stats, folder, output_name, nb_props, nb_index) + + +def _plots_request_time_per_nb_jobs( + stats: dict, folder: str, output_name: str, nb_props: int, has_index: int +): + cdict = { + "red": ( + (0.0, 0.0, 0.0), + # (1.0, 0.5, 0.5), + (1.0, 1.0, 0.0), + ), + "green": ( + (0.0, 0.0, 1.0), + # (1.0, 0.5, 0.5), + (1.0, 0.0, 0.0), + ), + "blue": ( + (0.0, 0.0, 0.0), + # (1.0, 0.0, 0.0), + (1.0, 0.0, 0.0), + ), + } + + cmap = colors.LinearSegmentedColormap("custom", cdict) + + student00_to_plot = {} + for (student00, student01), durations in stats.items(): + average_duration = _debug_average_seconds((student00, student01), durations) + student00_to_plot.setdefault(student00, []).append( + (student01, average_duration) + ) + + fig, ax = plt.subplots() + n = len(student00_to_plot) - 1 + for i, student00 in enumerate(sorted(student00_to_plot.keys())): + local_data = student00_to_plot[student00] + xs = [couple[0] for couple in local_data] + ys = [couple[1] for couple in local_data] + print(cmap(i / n)) + ax.plot( + xs, + ys, + marker="o", + label=f"student00: {student00} jobs", + c=cmap(i / n), + ) + # _show_points(xs, ys) + + ax.set_title( + f"Request duration per number of jobs for student01 ({nb_props} props per dict)" + + (" (no MongoDB index)" if not has_index else "") + ) + ax.set_xlabel("Number of student01's jobs in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join(folder, f"{output_name}.jpg") + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _show_points(xs, ys): + # return + for x, y in zip(xs, ys): + plt.text(x, y, f"({x}, {round(y, 2)})") + + +def _debug_average_seconds(key, durations): + sdt00, std01 = key + avg = sum(durations) / (len(durations) * 1e9) + print( + f"student00 {sdt00:02} student01 {std01:02}", + avg, + [d / 1e9 for d in durations], + ) + return avg + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_job_request_benchmark.py b/scripts/plot_job_request_benchmark.py new file mode 100644 index 00000000..551d1e39 --- /dev/null +++ b/scripts/plot_job_request_benchmark.py @@ -0,0 +1,162 @@ +import os +import sys +import json + + +try: + import matplotlib.pyplot as plt + + # plt.figure(figure=(10.8, 7.2), dpi=100) +except Exception: + print( + "Matplotlib needed. You can install it with `pip install matplotlib`", + file=sys.stderr, + ) + raise + + +def main(): + if len(sys.argv) != 2: + print("Missing stats folder", file=sys.stderr) + sys.exit(1) + + # Get stat files. + folder = sys.argv[1] + stats_file_names = [] + for name in os.listdir(folder): + if name.startswith("jobs-") and name.endswith(".json"): + stats_file_names.append(name) + + # Get stat data. + stats = {} + nbs_jobs = [] + nbs_dicts = [] + nbs_props = [] + for name in sorted(stats_file_names): + title, extension = name.split(".") + jobs_info, dicts_info, props_info = title.split("_") + _, nb_jobs = jobs_info.split("-") + _, nb_dicts = dicts_info.split("-") + _, nb_props_per_dict = props_info.split("-") + nb_jobs = int(nb_jobs) + nb_dicts = int(nb_dicts) + nb_props_per_dict = int(nb_props_per_dict) + with open(os.path.join(folder, name)) as file: + local_stats = json.load(file) + assert len({stat["nb_jobs"] for stat in local_stats}) == 1 + durations = sorted(stat["pc_nanoseconds"] for stat in local_stats) + stats[(nb_jobs, nb_dicts, nb_props_per_dict)] = durations + nbs_jobs.append(nb_jobs) + nbs_dicts.append(nb_dicts) + nbs_props.append(nb_props_per_dict) + + assert sorted(set(nbs_jobs)) == sorted(set(nbs_dicts)) + Ns = sorted(set(nbs_jobs)) + Ks = sorted(set(nbs_props)) + + _plot_request_time_per_nb_dicts(stats, Ns, Ks, folder) + _plots_request_time_per_nb_jobs(stats, Ns, Ks, folder) + + +def _plot_request_time_per_nb_dicts(stats: dict, Ns: list, Ks: list, folder: str): + N = max(Ns) + + x_nb_dicts = list(Ns) + y_time = {nb_props: [] for nb_props in Ks} + + for nb_props in Ks: + print() + for nb_dicts in Ns: + key = (N, nb_dicts, nb_props) + average_duration = _debug_average_seconds(key, stats[key]) + y_time[nb_props].append(average_duration) + + fig, ax = plt.subplots() + for nb_props in Ks: + ax.plot( + x_nb_dicts, + y_time[nb_props], + marker="o", + label=f"{_compute_nb_jobs(N)} jobs in DB, {nb_props} prop(s) per dict", + ) + _show_points(x_nb_dicts, y_time[nb_props]) + + ax.set_title("Request duration per number of job-user dicts") + ax.set_xlabel("Number of job-user dicts in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join( + folder, + f"nb_dicts_to_time_for_{_compute_nb_jobs(N)}_jobs.jpg", + ) + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _plots_request_time_per_nb_jobs(stats: dict, Ns: list, Ks: list, folder: str): + x_nb_jobs = list(Ns) + y_time_0_dicts_1_props = [] + y_time_N_dicts = {nb_props: [] for nb_props in Ks} + N = max(Ns) + + print() + for nb_jobs in Ns: + key = (nb_jobs, 0, 1) + average_duration = _debug_average_seconds(key, stats[key]) + y_time_0_dicts_1_props.append(average_duration) + print() + for nb_props in Ks: + for nb_jobs in Ns: + key = (nb_jobs, N, nb_props) + average_duration = _debug_average_seconds(key, stats[key]) + y_time_N_dicts[nb_props].append(average_duration) + + fig, ax = plt.subplots() + ax.plot( + x_nb_jobs, y_time_0_dicts_1_props, marker="o", label=f"0 job-user dicts in DB" + ) + _show_points(x_nb_jobs, y_time_0_dicts_1_props) + + for nb_props in Ks: + ax.plot( + x_nb_jobs, + y_time_N_dicts[nb_props], + marker="o", + label=f"{_compute_nb_jobs(N)} job-user dicts in DB, {nb_props} props per dict", + ) + _show_points(x_nb_jobs, y_time_N_dicts[nb_props]) + + ax.set_title("Request duration per number of jobs") + ax.set_xlabel("Number of jobs in DB") + ax.set_ylabel("Request duration in seconds") + ax.legend() + plot_path = os.path.join(folder, f"nb_jobs_to_time.jpg") + plt.gcf().set_size_inches(20, 10) + plt.savefig(plot_path, bbox_inches="tight") + plt.close(fig) + + +def _compute_nb_jobs(n: int): + return n + + +def _show_points(xs, ys): + # return + for x, y in zip(xs, ys): + plt.text(x, y, f"({x}, {round(y, 2)})") + + +def _debug_average_seconds(key, durations): + nb_jobs, nb_dicts, nb_props = key + avg = sum(durations) / (len(durations) * 1e9) + print( + f"jobs {nb_jobs:02} dicts {nb_dicts:02} props {nb_props:02}", + avg, + [d / 1e9 for d in durations], + ) + return avg + + +if __name__ == "__main__": + main() diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 77c7ad90..6af79857 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -9,3 +9,4 @@ MarkupSafe==2.1.3 pyasn1==0.5.0 pymongo==4.5.0 Werkzeug==3.0.1 +matplotlib==3.8.3 diff --git a/scripts/server_benchmark_locust.py b/scripts/server_benchmark_locust.py index 00eeaf18..501fb245 100644 --- a/scripts/server_benchmark_locust.py +++ b/scripts/server_benchmark_locust.py @@ -107,6 +107,7 @@ def __init__(self, *args, **kwargs): global NEXT_USER_ID super().__init__(*args, **kwargs) self.username = USERNAMES[NEXT_USER_ID % len(USERNAMES)] + # self.username = EMAIL # Move to next username for next user NEXT_USER_ID += 1 print("Username:", NEXT_USER_ID, self.username) diff --git a/scripts/store_huge_fake_data_in_db.py b/scripts/store_huge_fake_data_in_db.py new file mode 100644 index 00000000..00f36051 --- /dev/null +++ b/scripts/store_huge_fake_data_in_db.py @@ -0,0 +1,571 @@ +""" +Variation du temps de requête en fonction du nombre de dictionnaires job-utilisateur +Pour un nombre de jobs fixes = n: + 0 à n dicts de 1 prop chacun + --nb-dicts + 1 à k props pour chacun des n dicts + --nb-props +Variation du temps de requête en fonction du nombre de jobs dans la DB + Avec 0 dicts: 0 à n jobs + --nb-jobs + Avec n dicts de k props: 0 à n jobs + --nb-jobs + +n = 19 +--nb-jobs: 0 à n => 2 ** 0 à 2 ** n +--nb-dicts: 0 à n => 2 ** 0 à 2 ** n +--nb-props: 1 à k + +Paramètres: +--nb-jobs --nb-dicts --nb-props-per-dict +n 0 1 +n ... 1 +n n 1 +n n ... +n n k + +0 0 1 +... 0 1 +n 0 1 +------------VS----------- +0 n k +... n k +n n k +""" + +import argparse +import sys + +from clockwork_web.config import register_config +from slurm_state.mongo_client import get_mongo_client +from slurm_state.config import get_config + +USERS = [ + { + "mila_email_username": "student00@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa00", + "mila_cluster_username": "milauser00", + "cc_account_username": "ccuser00", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student01@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa01", + "mila_cluster_username": "milauser01", + "cc_account_username": "ccuser01", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student02@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa02", + "mila_cluster_username": "milauser02", + "cc_account_username": "ccuser02", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student03@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa03", + "mila_cluster_username": "milauser03", + "cc_account_username": "ccuser03", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student04@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa04", + "mila_cluster_username": "milauser04", + "cc_account_username": "ccuser04", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student05@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa05", + "mila_cluster_username": "milauser05", + "cc_account_username": "ccuser05", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student06@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa06", + "mila_cluster_username": "milauser06", + "cc_account_username": None, + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student07@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa07", + "mila_cluster_username": "milauser07", + "cc_account_username": "ccuser07", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student08@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa08", + "mila_cluster_username": "milauser08", + "cc_account_username": "ccuser08", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student09@mila.quebec", + "status": "disabled", + "clockwork_api_key": "000aaa09", + "mila_cluster_username": "milauser09", + "cc_account_username": "ccuser09", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student10@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa10", + "mila_cluster_username": "milauser10", + "cc_account_username": "ccuser10", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student11@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa11", + "mila_cluster_username": "milauser11", + "cc_account_username": "ccuser11", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student12@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa12", + "mila_cluster_username": "milauser12", + "cc_account_username": "ccuser12", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student13@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa13", + "mila_cluster_username": "milauser13", + "cc_account_username": "ccuser13", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student14@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa14", + "mila_cluster_username": "milauser14", + "cc_account_username": "ccuser14", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student15@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa15", + "mila_cluster_username": "milauser15", + "cc_account_username": "ccuser15", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student16@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa16", + "mila_cluster_username": "milauser16", + "cc_account_username": "ccuser16", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student17@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa17", + "mila_cluster_username": "milauser17", + "cc_account_username": "ccuser17", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, + { + "mila_email_username": "student18@mila.quebec", + "status": "enabled", + "clockwork_api_key": "000aaa18", + "mila_cluster_username": "milauser18", + "cc_account_username": "ccuser18", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "en", + }, + }, + { + "mila_email_username": "student19@mila.quebec", + "status": "disabled", + "clockwork_api_key": "000aaa19", + "mila_cluster_username": "milauser19", + "cc_account_username": "ccuser19", + "cc_account_update_key": None, + "web_settings": { + "nbr_items_per_page": 40, + "dark_mode": False, + "language": "fr", + }, + }, +] +BASE_JOB_SLURM = { + "account": "def-patate-rrg", + "cluster_name": "beluga", + "time_limit": 4320, + "submit_time": 1681680327, + "start_time": 0, + "end_time": 0, + "exit_code": "SUCCESS:0", + "array_job_id": "0", + "array_task_id": "None", + "job_id": "197775", + "name": "somejobname_507716", + "nodes": "None assigned", + "partition": "other_fun_partition", + "job_state": "PENDING", + "tres_allocated": {}, + "tres_requested": { + "num_cpus": 80, + "mem": 95000, + "num_nodes": 1, + "billing": 80, + }, + "username": "ccuser02", + "working_directory": "/a809/b333/c569", +} +BASE_JOB_CW = { + "mila_email_username": "student02@mila.quebec", + "last_slurm_update": 1686248596.476063, + "last_slurm_update_by_sacct": 1686248596.476063, +} + + +DEFAULT_NB_JOBS = 1_000_000 +DEFAULT_NB_DICTS = DEFAULT_NB_JOBS +DEFAULT_NB_PROPS_PER_DICT = 4 + + +def _generate_huge_fake_data( + nb_jobs=DEFAULT_NB_JOBS, + nb_student_jobs=None, + nb_dicts=DEFAULT_NB_DICTS, + nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT, + props_username="student00@mila.quebec", +): + student_to_nb_jobs = [] + if nb_student_jobs is not None: + for desc in nb_student_jobs: + student_name, str_nb_student_jobs = desc.split("=") + nb_student_jobs = int(str_nb_student_jobs.strip()) + student_to_nb_jobs.append((student_name.strip(), nb_student_jobs)) + else: + assert nb_jobs >= 0 + + jobs = [] + + # populate jobs + if student_to_nb_jobs: + user_map = {user["mila_email_username"]: user for user in USERS} + assert len(user_map) == len(USERS) + job_id = 0 + for student_name, nb_student_jobs in student_to_nb_jobs: + student_email = f"{student_name}@mila.quebec" + user = user_map[student_email] + for i in range(nb_student_jobs): + job_id += 1 + jobs.append( + { + "slurm": { + "account": "def-patate-rrg", + "cluster_name": "beluga", + "time_limit": 4320, + "submit_time": 1681680327, + "start_time": 0, + "end_time": 0, + "exit_code": "SUCCESS:0", + "array_job_id": "0", + "array_task_id": "None", + "job_id": str(job_id), + "name": f"job_name_{job_id}", + "nodes": "None assigned", + "partition": "other_fun_partition", + "job_state": "PENDING", + "tres_allocated": {}, + "tres_requested": { + "num_cpus": 80, + "mem": 95000, + "num_nodes": 1, + "billing": 80, + }, + "username": user["cc_account_username"], + "working_directory": "/a809/b333/c569", + }, + "cw": { + "mila_email_username": user["mila_email_username"], + "last_slurm_update": 1686248596.476063, + "last_slurm_update_by_sacct": 1686248596.476063, + }, + "user": {}, + } + ) + + print(f"Student {student_email}: {nb_student_jobs} jobs") + + assert job_id == len(jobs) + else: + for i in range(nb_jobs): + user = USERS[i % len(USERS)] + job_id = i + 1 + job_slurm = BASE_JOB_SLURM.copy() + job_cw = BASE_JOB_CW.copy() + # edit slurm.job_id + job_slurm["job_id"] = str(job_id) + # edit slurm.name + job_slurm["name"] = f"job_name_{job_id}" + # edit slurm.username + job_slurm["username"] = user["cc_account_username"] + # edit cw.mila_email_username + job_cw["mila_email_username"] = user["mila_email_username"] + jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}}) + + # populate job-user-dicts + job_user_dicts = [ + { + "mila_email_username": props_username, + "job_id": i + 1, + "cluster_name": "beluga", + "props": { + f"prop_{j + 1}_for_job_{i + 1}": f"I am user dict prop {j + 1} for job ID {i + 1}" + for j in range(nb_props_per_dict) + }, + } + for i in range(nb_dicts) + ] + + print( + f"Jobs: {len(jobs)}, dicts: {len(job_user_dicts)}, props per dict: {nb_props_per_dict}" + ) + return {"users": USERS, "jobs": jobs, "job_user_props": job_user_dicts} + + +def populate_fake_data(db_insertion_point, **kwargs): + disable_index = kwargs.pop("disable_index", False) + + print("Generating huge fake data") + E = _generate_huge_fake_data(**kwargs) + print("Generated huge fake data") + + # Drop any collection (and related index) before. + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: + db_insertion_point[k].drop() + assert not list(db_insertion_point[k].list_indexes()) + + if not disable_index: + print("Generate MongoDB index.") + # Create indices. This isn't half as important as when we're + # dealing with large quantities of data, but it's part of the + # set up for the database. + db_insertion_point["jobs"].create_index( + [ + ("slurm.job_id", 1), + ("slurm.cluster_name", 1), + ("cw.mila_email_username", 1), + ], + name="job_id_and_cluster_name", + ) + db_insertion_point["nodes"].create_index( + [("slurm.name", 1), ("slurm.cluster_name", 1)], + name="name_and_cluster_name", + ) + db_insertion_point["users"].create_index( + [("mila_email_username", 1)], name="users_email_index" + ) + db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") + db_insertion_point["job_user_props"].create_index( + [ + ("mila_email_username", 1), + ("job_id", 1), + ("cluster_name", 1), + ], + name="job_user_props_index", + ) + + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: + assert list(db_insertion_point[k].list_indexes()) + + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: + # Anyway clean before inserting + db_insertion_point[k].delete_many({}) + if k in E and E[k]: + print(f"Inserting {k}, {len(E[k])} value(s)") + db_insertion_point[k].insert_many(E[k]) + # Check count + assert db_insertion_point[k].count_documents({}) == len(E[k]) + print("Inserted", k) + + +def store_data_in_db(**kwargs): + # Open the database and insert the contents. + client = get_mongo_client() + populate_fake_data(client[get_config("mongo.database_name")], **kwargs) + + +def main(argv): + # Retrieve the arguments passed to the script + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-j", + "--nb-student-jobs", + action="append", + type=str, + help=( + "Number of job for a specific student, in format: =. " + "Accept multiple declarations. Example: -j student00=100 -j student05=1900" + ), + ) + group.add_argument( + "--nb-jobs", + type=int, + default=DEFAULT_NB_JOBS, + help="Number of jobs to add. May be 0 (no job added).", + ) + parser.add_argument( + "--nb-dicts", + type=int, + default=DEFAULT_NB_DICTS, + help="Number of job-user dicts to add. May be 0 (no job added).", + ) + parser.add_argument( + "--nb-props-per-dict", + type=int, + default=DEFAULT_NB_PROPS_PER_DICT, + help=f"Number of key-value pairs in each job-user dict.", + ) + parser.add_argument( + "--props-username", + type=str, + default="student00@mila.quebec", + help="Email of user who creates job-user dicts.", + ) + parser.add_argument( + "--disable-index", + action="store_true", + help="If specified, will not create MongoDB index.", + ) + args = parser.parse_args(argv[1:]) + print(args) + + # Register the elements to access the database + register_config("mongo.connection_string", "") + register_config("mongo.database_name", "clockwork") + + # Store the generated fake data in the database + store_data_in_db( + nb_jobs=args.nb_jobs, + nb_student_jobs=args.nb_student_jobs, + nb_dicts=args.nb_dicts, + nb_props_per_dict=args.nb_props_per_dict, + props_username=args.props_username, + disable_index=args.disable_index, + ) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/test_common/fake_data.json b/test_common/fake_data.json index b38bb1a2..29c18e58 100644 --- a/test_common/fake_data.json +++ b/test_common/fake_data.json @@ -5962,5 +5962,48 @@ "tensor_cores": 576, "tflops_fp32": 16.31 } + ], + "job_user_props": [ + { + "mila_email_username": "student00@mila.quebec", + "job_id": 795002, + "cluster_name": "mila", + "props": { + "name": "je suis une user prop 1" + } + }, + { + "mila_email_username": "student00@mila.quebec", + "job_id": 606872, + "cluster_name": "mila", + "props": { + "name": "je suis une user prop 2" + } + }, + { + "mila_email_username": "student00@mila.quebec", + "job_id": 834395, + "cluster_name": "graham", + "props": { + "name": "je suis une user prop 3" + } + }, + { + "mila_email_username": "student00@mila.quebec", + "job_id": 154325, + "cluster_name": "graham", + "props": { + "name": "je suis une user prop 3", + "name2": "je suis une user prop 4" + } + }, + { + "mila_email_username": "student00@mila.quebec", + "job_id": 613024, + "cluster_name": "graham", + "props": { + "name": "je suis une user prop 1" + } + } ] -} \ No newline at end of file +} diff --git a/test_common/fake_data.py b/test_common/fake_data.py index 5061ffc0..7efb55ea 100644 --- a/test_common/fake_data.py +++ b/test_common/fake_data.py @@ -20,6 +20,20 @@ def fake_data(): ) with open(json_file, "r") as f: E = json.load(f) + + # Add user props to jobs + for job in E["jobs"]: + job_id = int(job["slurm"]["job_id"]) + mila_email_username = job["cw"]["mila_email_username"] + cluster_name = job["slurm"]["cluster_name"] + for user_props in E["job_user_props"]: + if ( + user_props["job_id"] == job_id + and user_props["mila_email_username"] == mila_email_username + and user_props["cluster_name"] == cluster_name + ): + job["job_user_props"] = user_props["props"] + mutate_some_job_status(E) return E @@ -70,8 +84,12 @@ def populate_fake_data(db_insertion_point, json_file=None, mutate=False): [("mila_email_username", 1)], name="users_email_index" ) db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name") + db_insertion_point["job_user_props"].create_index( + [("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)], + name="job_user_props_index", + ) - for k in ["users", "jobs", "nodes", "gpu"]: + for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]: if k in E: for e in E[k]: db_insertion_point[k].insert_one(e) @@ -96,6 +114,11 @@ def cleanup_function(): for e in E["gpu"]: db_insertion_point["gpu"].delete_many({"name": e["name"]}) + for e in E["job_user_props"]: + copy_e = e.copy() + copy_e.pop("props") + db_insertion_point["job_user_props"].delete_many(copy_e) + for (k, sub, id_field) in [ ("jobs", "slurm", "job_id"), ("nodes", "slurm", "name"), diff --git a/test_common/jobs_test_helpers.py b/test_common/jobs_test_helpers.py index 5804a2ec..a1f81872 100644 --- a/test_common/jobs_test_helpers.py +++ b/test_common/jobs_test_helpers.py @@ -34,11 +34,8 @@ def helper_single_job_at_random(fake_data, cluster_name): def validator(D_job): for k1 in original_D_job: - assert k1 in ["slurm", "cw", "user"] - for k2 in original_D_job[k1]: - assert ( - D_job[k1][k2] == original_D_job[k1][k2] - ), f"{D_job}\n{original_D_job}" + assert k1 in ["slurm", "cw", "user", "job_user_props"] + assert D_job[k1] == original_D_job[k1], f"{D_job}\n{original_D_job}" return validator, job_id @@ -167,8 +164,7 @@ def validator(LD_jobs): # compare all the dicts one by one for (D_job, D_original_job) in zip(LD_jobs, LD_original_jobs): for k1 in D_original_job: - assert k1 in ["slurm", "cw", "user"] - for k2 in D_original_job[k1]: - assert D_job[k1][k2] == D_original_job[k1][k2] + assert k1 in ["slurm", "cw", "user", "job_user_props"] + assert D_job[k1] == D_original_job[k1] return validator