Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test util db #3406

Merged
merged 11 commits into from
May 21, 2024
930 changes: 927 additions & 3 deletions qiita_db/support_files/patches/test_db_sql/92.sql

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion qiita_db/test/test_meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _get_daily_stats():
('num_studies_ebi', b'1', r_client.get),
('num_samples_ebi', b'27', r_client.get),
('number_samples_ebi_prep', b'54', r_client.get),
('num_processing_jobs', b'14', r_client.get)
('num_processing_jobs', b'474', r_client.get)
# not testing img/time for simplicity
# ('img', r_client.get),
# ('time', r_client.get)
Expand Down
13 changes: 10 additions & 3 deletions qiita_db/test/test_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,13 +494,20 @@ def test_processing_jobs(self):
'6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f',
'063e553b-327c-4818-ab4a-adfe58e49860',
'ac653cb5-76a6-4a45-929e-eb9b2dee6b63']
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
self.assertCountEqual(qdb.software.Command(1).processing_jobs, exp)

jobs = qdb.software.Command(1).processing_jobs
set_jobs = set(jobs)

# comparing the length of jobs and set_jobs, since there could've been
# duplicates in the tests
self.assertEqual(len(jobs), len(set_jobs))

exp = set([qdb.processing_job.ProcessingJob(j) for j in exp_jids])
self.assertEqual(len(set_jobs & exp), len(exp_jids))

exp_jids = ['bcc7ebcd-39c1-43e4-af2d-822e3589f14d']
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
self.assertCountEqual(qdb.software.Command(2).processing_jobs, exp)

self.assertCountEqual(qdb.software.Command(4).processing_jobs, [])


Expand Down
7 changes: 0 additions & 7 deletions qiita_db/test/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,17 +491,10 @@ def test_jobs(self):
limit=1, ignore_status=ignore_status), [
PJ('b72369f9-a886-4193-8d3d-f7b504168e75')])

# no jobs
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(qdb.user.User('[email protected]').jobs(
ignore_status=ignore_status), [])

# generates expected jobs
jobs = qdb.user.User('[email protected]').jobs()
self.assertEqual(jobs, [])

# no jobs
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(qdb.user.User('[email protected]').jobs(), [])

def test_update_email(self):
user = qdb.user.User('[email protected]')
with self.assertRaisesRegex(IncorrectEmailError, 'Bad email given:'):
Expand Down
22 changes: 13 additions & 9 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,19 +1309,23 @@ def test_quick_mounts_purge(self):

class ResourceAllocationPlotTests(TestCase):
def setUp(self):

self.PATH_TO_DATA = ('./qiita_db/test/test_data/'
'jobs_2024-02-21.tsv.gz')
self.CNAME = "Validate"
self.SNAME = "Diversity types - alpha_vector"
self.CNAME = "Split libraries FASTQ"
self.SNAME = "QIIMEq2"
self.col_name = 'samples * columns'
self.df = pd.read_csv(self.PATH_TO_DATA, sep='\t',
dtype={'extra_info': str})
self.columns = [
"sName", "sVersion", "cID", "cName", "processing_job_id",
"parameters", "samples", "columns", "input_size", "extra_info",
"MaxRSSRaw", "ElapsedRaw"]

# df is a dataframe that represents a table with columns specified in
# self.columns
self.df = qdb.util._retrieve_resource_data(
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
self.CNAME, self.SNAME, self.columns)

def test_plot_return(self):
# check the plot returns correct objects
fig1, axs1 = qdb.util.resource_allocation_plot(
self.PATH_TO_DATA, self.CNAME, self.SNAME, self.col_name)
self.df, self.CNAME, self.SNAME, self.col_name)
self.assertIsInstance(
fig1, Figure,
"Returned object fig1 is not a Matplotlib Figure")
Expand All @@ -1346,7 +1350,7 @@ def test_minimize_const(self):
failures_df = qdb.util._resource_allocation_failures(
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
failures = failures_df.shape[0]
self.assertEqual(bm, qdb.util.mem_model4, msg="""Best memory model
self.assertEqual(bm, qdb.util.mem_model3, msg="""Best memory model
doesn't match""")
self.assertEqual(failures, 0, "Number of failures must be 0")

Expand Down
42 changes: 37 additions & 5 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# memory constant functions defined for @resource_allocation_plot
Expand Down Expand Up @@ -2341,7 +2341,7 @@ def send_email(to, subject, body):
smtp.close()


def resource_allocation_plot(file, cname, sname, col_name):
def resource_allocation_plot(df, cname, sname, col_name):
"""Builds resource allocation plot for given filename and jobs

Parameters
Expand All @@ -2361,9 +2361,6 @@ def resource_allocation_plot(file, cname, sname, col_name):
Returns a matplotlib object with a plot
"""

df = pd.read_csv(file, sep='\t', dtype={'extra_info': str})
df['ElapsedRawTime'] = pd.to_timedelta(df.ElapsedRawTime)
df = df[(df.cName == cname) & (df.sName == sname)]
df.dropna(subset=['samples', 'columns'], inplace=True)
df[col_name] = df.samples * df['columns']
df[col_name] = df[col_name].astype(int)
Expand All @@ -2383,6 +2380,41 @@ def resource_allocation_plot(file, cname, sname, col_name):
return fig, axs


def _retrieve_resource_data(cname, sname, columns):
with qdb.sql_connection.TRN:
sql = """
SELECT
s.name AS sName,
s.version AS sVersion,
sc.command_id AS cID,
sc.name AS cName,
pr.processing_job_id AS processing_job_id,
pr.command_parameters AS parameters,
sra.samples AS samples,
sra.columns AS columns,
sra.input_size AS input_size,
sra.extra_info AS extra_info,
sra.memory_used AS memory_used,
sra.walltime_used AS walltime_used
FROM
qiita.processing_job pr
JOIN
qiita.software_command sc ON pr.command_id = sc.command_id
JOIN
qiita.software s ON sc.software_id = s.software_id
JOIN
qiita.slurm_resource_allocations sra
ON pr.processing_job_id = sra.processing_job_id
WHERE
sc.name = %s
AND s.name = %s;
"""
qdb.sql_connection.TRN.add(sql, sql_args=[cname, sname])
res = qdb.sql_connection.TRN.execute_fetchindex()
df = pd.DataFrame(res, columns=columns)
return df


def _resource_allocation_plot_helper(
df, ax, cname, sname, curr, models, col_name):
"""Helper function for resource allocation plot. Builds plot for MaxRSSRaw
Expand Down
Loading