Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Operator to generate inputs for sample sheets #848

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
2 changes: 2 additions & 0 deletions beagle/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@

RIDGEBACK_URL = os.environ.get('BEAGLE_RIDGEBACK_URL', 'http://localhost:5003')

MPATH_URL = os.environ.get('BEAGLE_MPATH_URL', 'http://localhost:7331')

LOG_PATH = os.environ.get('BEAGLE_LOG_PATH', 'beagle-server.log')

LOGGING = {
Expand Down
12 changes: 12 additions & 0 deletions beagle_etl/fixtures/beagle_etl.operator.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,17 @@
"version": "v2.0.0",
"slug": "AccessQCOperator"
}
},
{
"model": "beagle_etl.operator",
"pk": 12,
"fields": {
"active": true,
"recipes": "[\"None\"]",
"class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator",
"version": "v1.0.0",
"slug": "AccessSampleSheetOperator"
}
}

]
10 changes: 10 additions & 0 deletions fixtures/tests/10075_D.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -93,6 +94,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": "10075_D_4_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -168,6 +170,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_2",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -244,6 +247,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -320,6 +324,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -395,6 +400,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L009_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -471,6 +477,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L008_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -547,6 +554,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L007_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -622,6 +630,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L006_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": "10075_D_1_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -697,6 +706,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L005_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down
8 changes: 8 additions & 0 deletions fixtures/tests/10075_D_single_TN_pair.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand All @@ -31,6 +32,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "[email protected]",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -95,6 +97,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -107,6 +110,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "[email protected]",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -171,6 +175,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand All @@ -183,6 +188,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "[email protected]",
"oncoTreeCode": null,
Expand Down Expand Up @@ -248,6 +254,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -260,6 +267,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "[email protected]",
"oncoTreeCode": null,
Expand Down
16 changes: 16 additions & 0 deletions runner/fixtures/runner.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,22 @@
"default": true
}
},
{
"model": "runner.pipeline",
"pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb",
"fields": {
"created_date": "2019-11-18T17:46:45.118Z",
"modified_date": "2019-12-05T01:12:39.854Z",
"name": "sample sheet",
"github": "[email protected]:mskcc/ACCESS-Pipeline",
"version": "ij/output_bam_files_instead_of_directory",
"entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl",
"output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733",
"output_directory": "/work/access/production/runs/voyager/sample_sheets",
"operator": 10,
"default": true
}
},
{
"model": "runner.pipeline",
"pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbabc",
Expand Down
141 changes: 141 additions & 0 deletions runner/operator/access/v1_0_0/mpath_submitter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
" This operator submits each downstream pipeline to MPath
" An operator trigger, for each downstream pipeline (MSI/CNV/SV/SNV),
" _when all runs are complete_, should be created.
" For additional information on the API and how MPath ACCESS server
" is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/
"""
import requests

from beagle.settings import MPATH_URL
from runner.operator.operator import Operator
from runner.models import PortType, Run, RunStatus, File

PIPELINE_NAME_TO_MPATH_TYPE = {
"access legacy MSI": "admie_microsatellite_instability",
"access legacy SNV": "snp_indel_variants",
"access legacy SV": "structural_variants",
"access legacy CNV": "copy_number_variants",
}

PIPELINE_NAME_TO_MPATH_LOCATION_KEY = {
"access legacy MSI": "msi_fs_location",
"access legacy SNV": "snp_fs_location",
"access legacy SV": "sv_fs_location",
"access legacy CNV": "cnv_fs_location",
}

PIPELINE_NAME_TO_FILES = {
"access legacy MSI": ["msi_results.txt"],
"access legacy SNV": [], #TODO
"access legacy SV": [],
"access legacy CNV": []
}


def get_sample_sheet(request_id, job_group_id):
sample_sheet_run = Run.objects.filter(
app__name="sample sheet",
status=RunStatus.COMPLETED,

# Using job_group_id is better but in order to trigger MPath Submitter
# for the massive backlog where Sample Sheet was generated in a different
# job_group we have to use request ID
# To trigger this for the backlog see:
# https://app.gitbook.com/@mskcc-1/s/voyager/debugging/using-the-django-shell
# Once the backlog is submitted this should be reverted

# job_group_id=job_group_id,

tags__requestId=request_id
).order_by('-created_date').first()

sample_sheet = File.objects.filter(
port__run=sample_sheet_run,
port__port_type=PortType.OUTPUT
).first()

return sample_sheet.path


def juno_path_to_mpath(path):
[_, p] = path.split("/voyager")
return "/voyager" + p


# This will return 400 bad request if the project already exists.
def submit_project(request_id):
payload = {
"data": [
{
"comments": "",
"dmp_alys_task_name": "ACCESSv1-" + request_id,
"dmp_alys_task_type_cv_id": 7,
# TODO
"analyst_cv_id": None,
"dmp_dms_at_id": None,
"dmp_dms_id": None,
"dmp_lims_id": None,
"fellow_cv_id": None,
"fs_location": "N/A",
"is_clinical": 0,
"pathologist_cv_id": None
}
]
}

requests.post(MPATH_URL + "/ngs/projects", json=payload)


def submit_pipeline(request_id, pipeline_name, files, sample_sheet_path):
mpath_type = PIPELINE_NAME_TO_MPATH_TYPE[pipeline_name]
location_key = PIPELINE_NAME_TO_MPATH_LOCATION_KEY[pipeline_name]

data = {
"dmp_alys_task_name": "ACCESSv1-" + request_id,
"ss_location": [
juno_path_to_mpath(sample_sheet_path)
],
# This shouldn't be required. We can't use READONLY dirs so pointing to
# /voyager does not work. Talk with Anoop on what should be done.
"fs_location": "/srv",
"options": [
mpath_type,
"samples"
],
}
data[location_key] = [juno_path_to_mpath(f.path) for f in files]

payload = {
"data": [data]
}

requests.post(MPATH_URL + "/ngs/", json=payload)


def get_files(pipeline_name, runs):
return File.objects.filter(
port__run__in=runs,
port__run__status=RunStatus.COMPLETED,
port__port_type=PortType.OUTPUT,
file_name__in=PIPELINE_NAME_TO_FILES[pipeline_name]
).all()


class AccessMPathSubmitter(Operator):
def get_jobs(self):
runs = Run.objects.filter(id__in=self.run_ids)
meta_run = runs[0]

request_id = meta_run.metadata["requestId"]
pipeline_name = meta_run.app.name
job_group_id = meta_run.job_group_id

sample_sheet_path = get_sample_sheet(request_id, job_group_id)

files = get_files(pipeline_name, runs)

submit_project(request_id)
submit_pipeline(request_id, pipeline_name, files, sample_sheet_path)

return []
Loading