mskcc · aef- · May 19, 2021 · May 19, 2021 · May 19, 2021 · May 21, 2021
diff --git a/beagle/settings.py b/beagle/settings.py
@@ -322,6 +322,8 @@
 
 RIDGEBACK_URL = os.environ.get('BEAGLE_RIDGEBACK_URL', 'http://localhost:5003')
 
+MPATH_URL = os.environ.get('BEAGLE_MPATH_URL', 'http://localhost:7331')
+
 LOG_PATH = os.environ.get('BEAGLE_LOG_PATH', 'beagle-server.log')
 
 LOGGING = {

diff --git a/beagle_etl/fixtures/beagle_etl.operator.json b/beagle_etl/fixtures/beagle_etl.operator.json
@@ -119,5 +119,17 @@
             "version": "v2.0.0",
             "slug": "AccessQCOperator"
         }
+    },
+    {
+        "model": "beagle_etl.operator",
+        "pk": 12,
+        "fields": {
+            "active": true,
+            "recipes": "[\"None\"]",
+            "class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator",
+            "version": "v1.0.0",
+            "slug": "AccessSampleSheetOperator"
+        }
     }
+
 ]
diff --git a/fixtures/tests/10075_D.filemetadata.json b/fixtures/tests/10075_D.filemetadata.json
@@ -18,6 +18,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L013_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": "10075_D_5_1_1_1",
                 "patientId": "C-8VK0V7",
@@ -93,6 +94,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_4",
+                "cmoSampleName": "s_C_0CREWW_L012_d",
                 "barcodeId": "DUAL_IDT_LIB_255",
                 "libraryId": "10075_D_4_1_1_1",
                 "patientId": "C-DRKHP7",
@@ -168,6 +170,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_2",
+                "cmoSampleName": "s_C_0CREWW_L011_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_2",
                 "patientId": "C-DRKHP7",
@@ -244,6 +247,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_3",
                 "patientId": "C-8VK0V7",
@@ -320,6 +324,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_4",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": "DUAL_IDT_LIB_255",
                 "libraryId": null,
                 "patientId": "C-DRKHP7",
@@ -395,6 +400,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_2",
+                "cmoSampleName": "s_C_0CREWW_L009_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-DRKHP7",
@@ -471,6 +477,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L008_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -547,6 +554,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L007_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -622,6 +630,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_1",
+                "cmoSampleName": "s_C_0CREWW_L006_d",
                 "barcodeId": "DUAL_IDT_LIB_243",
                 "libraryId": "10075_D_1_1_1_1",
                 "patientId": "C-DRKHP7",
@@ -697,6 +706,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_1",
+                "cmoSampleName": "s_C_0CREWW_L005_d",
                 "barcodeId": "DUAL_IDT_LIB_243",
                 "libraryId": null,
                 "patientId": "C-DRKHP7",

diff --git a/fixtures/tests/10075_D_single_TN_pair.filemetadata.json b/fixtures/tests/10075_D_single_TN_pair.filemetadata.json
@@ -19,6 +19,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L013_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": "10075_D_5_1_1_1",
                 "patientId": "C-8VK0V7",
@@ -31,6 +32,7 @@
                 "captureName": "Pool-09483_R-10075_D-Tube7_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": "GTATTGGC-TTGTCGGT",
                 "labHeadEmail": "[email protected]",
                 "oncoTreeCode": "MEL",
@@ -95,6 +97,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L012_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -107,6 +110,7 @@
                 "captureName": "Pool-09483_R-10075_D-Tube7_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": "GTATTGGC-TTGTCGGT",
                 "labHeadEmail": "[email protected]",
                 "oncoTreeCode": "MEL",
@@ -171,6 +175,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L011_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_3",
                 "patientId": "C-8VK0V7",
@@ -183,6 +188,7 @@
                 "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": null,
                 "labHeadEmail": "[email protected]",
                 "oncoTreeCode": null,
@@ -248,6 +254,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -260,6 +267,7 @@
                 "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": null,
                 "labHeadEmail": "[email protected]",
                 "oncoTreeCode": null,

diff --git a/runner/fixtures/runner.pipeline.json b/runner/fixtures/runner.pipeline.json
@@ -172,6 +172,22 @@
             "default": true
         }
     },
+    {
+        "model": "runner.pipeline",
+        "pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb",
+        "fields": {
+            "created_date": "2019-11-18T17:46:45.118Z",
+            "modified_date": "2019-12-05T01:12:39.854Z",
+            "name": "sample sheet",
+            "github": "[email protected]:mskcc/ACCESS-Pipeline",
+            "version": "ij/output_bam_files_instead_of_directory",
+            "entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl",
+            "output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733",
+            "output_directory": "/work/access/production/runs/voyager/sample_sheets",
+            "operator": 10,
+            "default": true
+        }
+    },
     {
         "model": "runner.pipeline",
         "pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbabc",

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -0,0 +1,141 @@
+"""
+" This operator submits each downstream pipeline to MPath
+" An operator trigger, for each downstream pipeline (MSI/CNV/SV/SNV),
+" _when all runs are complete_, should be created.
+" For additional information on the API and how MPath ACCESS server
+" is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/
+"""
+import requests
+
+from beagle.settings import MPATH_URL
+from runner.operator.operator import Operator
+from runner.models import PortType, Run, RunStatus, File
+
+PIPELINE_NAME_TO_MPATH_TYPE = {
+    "access legacy MSI": "admie_microsatellite_instability",
+    "access legacy SNV": "snp_indel_variants",
+    "access legacy SV": "structural_variants",
+    "access legacy CNV": "copy_number_variants",
+}
+
+PIPELINE_NAME_TO_MPATH_LOCATION_KEY = {
+    "access legacy MSI": "msi_fs_location",
+    "access legacy SNV": "snp_fs_location",
+    "access legacy SV": "sv_fs_location",
+    "access legacy CNV": "cnv_fs_location",
+}
+
+PIPELINE_NAME_TO_FILES = {
+    "access legacy MSI": ["msi_results.txt"],
+    "access legacy SNV": [], #TODO
+    "access legacy SV": [], 
+    "access legacy CNV": []
+}
+
+
+def get_sample_sheet(request_id, job_group_id):
+    sample_sheet_run = Run.objects.filter(
+        app__name="sample sheet",
+        status=RunStatus.COMPLETED,
+
+        # Using job_group_id is better but in order to trigger MPath Submitter
+        # for the massive backlog where Sample Sheet was generated in a different
+        # job_group we have to use request ID
+        # To trigger this for the backlog see:
+        # https://app.gitbook.com/@mskcc-1/s/voyager/debugging/using-the-django-shell
+        # Once the backlog is submitted this should be reverted
+
+        # job_group_id=job_group_id,
+
+        tags__requestId=request_id
+    ).order_by('-created_date').first()
+
+    sample_sheet = File.objects.filter(
+        port__run=sample_sheet_run,
+        port__port_type=PortType.OUTPUT
+    ).first()
+
+    return sample_sheet.path
+
+
+def juno_path_to_mpath(path):
+    [_, p] = path.split("/voyager")
+    return "/voyager" + p
+
+
+# This will return 400 bad request if the project already exists.
+def submit_project(request_id):
+    payload = {
+        "data": [
+            {
+                "comments": "",
+                "dmp_alys_task_name": "ACCESSv1-" + request_id,
+                "dmp_alys_task_type_cv_id": 7,
+                # TODO
+                "analyst_cv_id": None,
+                "dmp_dms_at_id": None,
+                "dmp_dms_id": None,
+                "dmp_lims_id": None,
+                "fellow_cv_id": None,
+                "fs_location": "N/A",
+                "is_clinical": 0,
+                "pathologist_cv_id": None
+            }
+        ]
+    }
+
+    requests.post(MPATH_URL + "/ngs/projects", json=payload)
+
+
+def submit_pipeline(request_id, pipeline_name, files, sample_sheet_path):
+    mpath_type = PIPELINE_NAME_TO_MPATH_TYPE[pipeline_name]
+    location_key = PIPELINE_NAME_TO_MPATH_LOCATION_KEY[pipeline_name]
+
+    data = {
+        "dmp_alys_task_name": "ACCESSv1-" + request_id,
+        "ss_location": [
+            juno_path_to_mpath(sample_sheet_path)
+        ],
+        # This shouldn't be required. We can't use READONLY dirs so pointing to 
+        # /voyager does not work. Talk with Anoop on what should be done.
+        "fs_location": "/srv",
+        "options": [
+            mpath_type,
+            "samples"
+        ],
+    }
+    data[location_key] = [juno_path_to_mpath(f.path) for f in files]
+
+    payload = {
+        "data": [data]
+    }
+
+    requests.post(MPATH_URL + "/ngs/", json=payload)
+
+
+def get_files(pipeline_name, runs):
+    return File.objects.filter(
+        port__run__in=runs,
+        port__run__status=RunStatus.COMPLETED,
+        port__port_type=PortType.OUTPUT,
+        file_name__in=PIPELINE_NAME_TO_FILES[pipeline_name]
+    ).all()
+
+
+class AccessMPathSubmitter(Operator):
+    def get_jobs(self):
+        runs = Run.objects.filter(id__in=self.run_ids)
+        meta_run = runs[0]
+
+        request_id = meta_run.metadata["requestId"]
+        pipeline_name = meta_run.app.name
+        job_group_id = meta_run.job_group_id
+
+        sample_sheet_path = get_sample_sheet(request_id, job_group_id)
+
+        files = get_files(pipeline_name, runs)
+
+        submit_project(request_id)
+        submit_pipeline(request_id, pipeline_name, files, sample_sheet_path)
+
+        return []