Skip to content

Commit

Permalink
Update.
Browse files Browse the repository at this point in the history
  • Loading branch information
notoraptor committed Mar 4, 2024
1 parent 48adaa3 commit 6c23935
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 73 deletions.
117 changes: 75 additions & 42 deletions scripts/gen_benchmark_script_students.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import sys
import os
from datetime import datetime
import argparse

SIZES_STUDENT00 = [0, 10_000, 100_000, 1_000_000, 2_000_000]
SIZES_STUDENT01 = list(range(0, 101, 20))
Expand All @@ -8,59 +10,90 @@
NB_REQUESTS = 10


def main():
if len(sys.argv) != 2:
print("Missing output folder name", file=sys.stderr)
exit(1)
def main(argv):
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--disable-index",
action="store_true",
help="If specified, will not create MongoDB index when storing fake data.",
)
args = parser.parse_args(argv[1:])
print("Generating benchmark script with args:", args, file=sys.stderr)

bench_date = datetime.now()
bench_basename = "bench_students"
if args.disable_index:
bench_basename += "_noindex"
bench_name = f"{bench_basename}_{bench_date}".replace(" ", "_").replace(":", "-")
assert not os.path.exists(bench_name)
os.mkdir(bench_name)

script_name = f"{bench_name}.sh"
with open(script_name, "w") as file:
print("set -eu", file=file)
print("export CLOCKWORK_API_KEY='000aaa01'", file=file)
print("export CLOCKWORK_EMAIL='[email protected]'", file=file)
print(file=file)

wd = sys.argv[1]
if not os.path.exists(wd):
os.mkdir(wd)
for std_00 in SIZES_STUDENT00:
for std_01 in SIZES_STUDENT01:
gen_commands(std_00, std_01, bench_name, args, file)

print("set -eu")
print("export CLOCKWORK_API_KEY='000aaa01'")
print("export CLOCKWORK_EMAIL='[email protected]'")
print()
print(file=file)
print(f"python3 scripts/plot_benchmark_students.py {bench_name}", file=file)
print(f"tar -cf {bench_name}.tar {bench_name}/", file=file)
print(f"echo Benchmark compressed in: {bench_name}.tar", file=file)

for std_00 in SIZES_STUDENT00:
for std_01 in SIZES_STUDENT01:
gen_commands(std_00, std_01, wd)
print("Benchmark script saved in:", script_name, file=sys.stderr)


def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory):
task_name = f"student00-{nb_jobs_student00:06}_student01-{nb_jobs_student01:06}"
def gen_commands(nb_jobs_student00, nb_jobs_student01, working_directory, args, file):
nb_dicts = nb_jobs_student00 + nb_jobs_student01
nb_props_per_dict = NB_PROPS_PER_DICT
task_name = (
f"std00-{nb_jobs_student00:06}_"
f"std01-{nb_jobs_student01:06}_"
f"dicts-{nb_dicts}_"
f"props-{NB_PROPS_PER_DICT}_"
f"index-{0 if args.disable_index else 1}"
)

cmd_fake_data = (
f"python3 scripts/store_huge_fake_data_in_db.py "
f"-j student00={nb_jobs_student00} "
f"-j student01={nb_jobs_student01} "
f"--nb-dicts {nb_dicts} "
f"--nb-props-per-dict {nb_props_per_dict}"
print(
(
f"python3 scripts/store_huge_fake_data_in_db.py "
f"-j student00={nb_jobs_student00} "
f"-j student01={nb_jobs_student01} "
f"--nb-dicts {nb_dicts} "
f"--nb-props-per-dict {NB_PROPS_PER_DICT} "
f"--props-username [email protected] "
f"{'--disable-index' if args.disable_index else ''}"
),
file=file,
)
cmd_benchmark = (
f"python3 scripts/job_request_benchmark.py "
f"-w {working_directory} "
f'--address "0.0.0.0" '
f"--port 5000 "
f'--username "[email protected]" '
f"--nb-requests {NB_REQUESTS} "
f"--output {task_name}"
print('python3 -m flask run --host="0.0.0.0" &', file=file)
print("export SERVER_PID=$!", file=file)
print("sleep 1", file=file)
print(
'''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"''',
file=file,
)

print(cmd_fake_data)
print('python3 -m flask run --host="0.0.0.0" &')
print("export SERVER_PID=$!")
print("sleep 1")
print(
'''python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:5000/').getcode())"'''
(
f"python3 scripts/job_request_benchmark.py "
f"-w {working_directory} "
f'--address "0.0.0.0" '
f"--port 5000 "
f'--username "[email protected]" '
f"--nb-requests {NB_REQUESTS} "
f"--output {task_name}"
),
file=file,
)
print(cmd_benchmark)
print("kill $SERVER_PID")
print("export SERVER_PID=")
print()
print("kill $SERVER_PID", file=file)
print("export SERVER_PID=", file=file)
print(file=file)


if __name__ == "__main__":
main()
main(sys.argv)
37 changes: 31 additions & 6 deletions scripts/plot_benchmark_students.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,33 @@ def main():
folder = sys.argv[1]
stats_file_names = []
for name in os.listdir(folder):
if name.startswith("student00-") and name.endswith(".json"):
if name.startswith("std00-") and name.endswith(".json"):
stats_file_names.append(name)

# Get stat data.
stats = {}
infos_nb_props = set()
infos_index = set()
for name in sorted(stats_file_names):
title, extension = name.split(".")
info_student00, info_student01 = title.split("_")
(
info_student00,
info_student01,
info_nb_dicts,
info_nb_props,
info_index,
) = title.split("_")
_, nb_jobs_student00 = info_student00.split("-")
_, nb_jobs_student01 = info_student01.split("-")
_, nb_dicts = info_nb_dicts.split("-")
_, nb_props = info_nb_props.split("-")
_, nb_index = info_index.split("-")
nb_jobs_student00 = int(nb_jobs_student00)
nb_jobs_student01 = int(nb_jobs_student01)
nb_props = int(nb_props)
nb_index = int(nb_index)
infos_nb_props.add(nb_props)
infos_index.add(nb_index)

with open(os.path.join(folder, name)) as file:
local_stats = json.load(file)
Expand All @@ -46,10 +61,17 @@ def main():
durations = sorted(stat["pc_nanoseconds"] for stat in local_stats)
stats[(nb_jobs_student00, nb_jobs_student01)] = durations

_plots_request_time_per_nb_jobs(stats, folder)
assert len(infos_nb_props) == 1
assert len(infos_index) == 1
nb_props = next(iter(infos_nb_props))
nb_index = next(iter(infos_index))
output_name = f"nb-student01-jobs-to-time_props-{nb_props}_index-{nb_index}"
_plots_request_time_per_nb_jobs(stats, folder, output_name, nb_props, nb_index)


def _plots_request_time_per_nb_jobs(stats: dict, folder: str):
def _plots_request_time_per_nb_jobs(
stats: dict, folder: str, output_name: str, nb_props: int, has_index: int
):
cdict = {
"red": (
(0.0, 0.0, 0.0),
Expand Down Expand Up @@ -93,11 +115,14 @@ def _plots_request_time_per_nb_jobs(stats: dict, folder: str):
)
# _show_points(xs, ys)

ax.set_title("Request duration per number of jobs for student01")
ax.set_title(
f"Request duration per number of jobs for student01 ({nb_props} props per dict)"
+ (" (no MongoDB index)" if not has_index else "")
)
ax.set_xlabel("Number of student01's jobs in DB")
ax.set_ylabel("Request duration in seconds")
ax.legend()
plot_path = os.path.join(folder, f"nb_student01_jobs_to_time.jpg")
plot_path = os.path.join(folder, f"{output_name}.jpg")
plt.gcf().set_size_inches(20, 10)
plt.savefig(plot_path, bbox_inches="tight")
plt.close(fig)
Expand Down
1 change: 1 addition & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ MarkupSafe==2.1.3
pyasn1==0.5.0
pymongo==4.5.0
Werkzeug==3.0.1
matplotlib==3.8.3
75 changes: 50 additions & 25 deletions scripts/store_huge_fake_data_in_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

import argparse
import sys
from datetime import datetime

from clockwork_web.config import register_config
from slurm_state.mongo_client import get_mongo_client
Expand Down Expand Up @@ -345,6 +344,7 @@ def _generate_huge_fake_data(
nb_student_jobs=None,
nb_dicts=DEFAULT_NB_DICTS,
nb_props_per_dict=DEFAULT_NB_PROPS_PER_DICT,
props_username="[email protected]",
):
student_to_nb_jobs = []
if nb_student_jobs is not None:
Expand Down Expand Up @@ -423,12 +423,9 @@ def _generate_huge_fake_data(
jobs.append({"slurm": job_slurm, "cw": job_cw, "user": {}})

# populate job-user-dicts
props_editor = (
"[email protected]" if nb_student_jobs else "[email protected]"
)
job_user_dicts = [
{
"mila_email_username": props_editor,
"mila_email_username": props_username,
"job_id": i + 1,
"cluster_name": "beluga",
"props": {
Expand All @@ -446,29 +443,41 @@ def _generate_huge_fake_data(


def populate_fake_data(db_insertion_point, **kwargs):
disable_index = kwargs.pop("disable_index", False)

print("Generating huge fake data")
E = _generate_huge_fake_data(**kwargs)
print("Generated huge fake data")

# Create indices. This isn't half as important as when we're
# dealing with large quantities of data, but it's part of the
# set up for the database.
db_insertion_point["jobs"].create_index(
[("slurm.job_id", 1), ("slurm.cluster_name", 1)],
name="job_id_and_cluster_name",
)
db_insertion_point["nodes"].create_index(
[("slurm.name", 1), ("slurm.cluster_name", 1)],
name="name_and_cluster_name",
)
db_insertion_point["users"].create_index(
[("mila_email_username", 1)], name="users_email_index"
)
db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
db_insertion_point["job_user_props"].create_index(
[("mila_email_username", 1), ("job_id", 1), ("cluster_name", 1), ("props", 1)],
name="job_user_props_index",
)
if not disable_index:
print("Generate MongoDB index.")
# Create indices. This isn't half as important as when we're
# dealing with large quantities of data, but it's part of the
# set up for the database.
db_insertion_point["jobs"].create_index(
[
("slurm.job_id", 1),
("slurm.cluster_name", 1),
("cw.mila_email_username", 1),
],
name="job_id_and_cluster_name",
)
db_insertion_point["nodes"].create_index(
[("slurm.name", 1), ("slurm.cluster_name", 1)],
name="name_and_cluster_name",
)
db_insertion_point["users"].create_index(
[("mila_email_username", 1)], name="users_email_index"
)
db_insertion_point["gpu"].create_index([("name", 1)], name="gpu_name")
db_insertion_point["job_user_props"].create_index(
[
("mila_email_username", 1),
("job_id", 1),
("cluster_name", 1),
],
name="job_user_props_index",
)

for k in ["users", "jobs", "nodes", "gpu", "job_user_props"]:
# Anyway clean before inserting
Expand Down Expand Up @@ -498,7 +507,10 @@ def main(argv):
"--nb-student-jobs",
action="append",
type=str,
help="Number of job for a specific student, in format: <student>=<nb-jobs>. Accept multiple declarations. Example: -j student00=100 -j student05=1900",
help=(
"Number of job for a specific student, in format: <student>=<nb-jobs>. "
"Accept multiple declarations. Example: -j student00=100 -j student05=1900"
),
)
group.add_argument(
"--nb-jobs",
Expand All @@ -518,6 +530,17 @@ def main(argv):
default=DEFAULT_NB_PROPS_PER_DICT,
help=f"Number of key-value pairs in each job-user dict.",
)
parser.add_argument(
"--props-username",
type=str,
default="[email protected]",
help="Email of user who creates job-user dicts.",
)
parser.add_argument(
"--disable-index",
action="store_true",
help="If specified, will not create MongoDB index.",
)
args = parser.parse_args(argv[1:])
print(args)

Expand All @@ -531,6 +554,8 @@ def main(argv):
nb_student_jobs=args.nb_student_jobs,
nb_dicts=args.nb_dicts,
nb_props_per_dict=args.nb_props_per_dict,
props_username=args.props_username,
disable_index=args.disable_index,
)


Expand Down

0 comments on commit 6c23935

Please sign in to comment.