Merge branch 'master' of https://github.com/uio-bmi/immuneML

uio-bmi · Dec 14, 2021 · cbba5c5 · cbba5c5
2 parents b57bbb6 + f1319bf
commit cbba5c5
Show file tree

Hide file tree

Showing 9 changed files with 71 additions and 24 deletions.
diff --git a/immuneML/api/galaxy/DataSimulationTool.py b/immuneML/api/galaxy/DataSimulationTool.py
@@ -39,11 +39,8 @@ def prepare_specs(self):
         ParameterValidator.assert_keys_present(specs["definitions"], ["datasets"], DataSimulationTool.__name__, "definitions/datasets")
         ParameterValidator.assert_type_and_value(specs['definitions']['datasets'], dict, DataSimulationTool.__name__, "definitions/datasets")
 
-        dataset_names = list(specs['definitions']['datasets'].keys())
-        assert len(dataset_names) == 1, f"{DataSimulationTool.__name__}: one dataset has to be defined under definitions/datasets, got " \
-                                        f"{dataset_names} instead."
-
-        self.dataset_name = dataset_names[0]
+        self.dataset_name = "dataset"
+        Util.update_dataset_key(specs, DataSimulationTool.__name__, self.dataset_name)
 
         Util.check_paths(specs, DataSimulationTool.__name__)
         Util.update_result_paths(specs, self.result_path, self.yaml_path)
diff --git a/immuneML/api/galaxy/DatasetGenerationTool.py b/immuneML/api/galaxy/DatasetGenerationTool.py
@@ -46,6 +46,7 @@ def _update_specs(self):
         self._check_dataset(specs)
         self._check_instruction(specs)
 
+        Util.update_dataset_key(specs, DatasetGenerationTool.__name__)
         Util.check_paths(specs, DatasetGenerationTool.__name__)
         Util.update_result_paths(specs, self.result_path, self.yaml_path)
 

diff --git a/immuneML/api/galaxy/GalaxySimulationTool.py b/immuneML/api/galaxy/GalaxySimulationTool.py
@@ -106,5 +106,6 @@ def prepare_specs(self):
         instruction_name = Util.check_instruction_type(specs, GalaxySimulationTool.__name__, SimulationInstruction.__name__[:-11])
         Util.check_export_format(specs, GalaxySimulationTool.__name__, instruction_name)
 
+        Util.update_dataset_key(specs, GalaxySimulationTool.__name__)
         Util.check_paths(specs, "GalaxySimulationTool")
         Util.update_result_paths(specs, self.result_path, self.yaml_path)
diff --git a/immuneML/api/galaxy/GalaxyYamlTool.py b/immuneML/api/galaxy/GalaxyYamlTool.py
@@ -27,6 +27,7 @@ def update_specs(self):
         with self.yaml_path.open("r") as file:
             specs_dict = yaml.safe_load(file)
 
+        Util.update_dataset_key(specs_dict, GalaxyYamlTool.__name__)
         Util.check_paths(specs_dict, 'GalaxyYamlTool')
         Util.update_result_paths(specs_dict, self.result_path, self.yaml_path)
 
diff --git a/immuneML/api/galaxy/Util.py b/immuneML/api/galaxy/Util.py
@@ -28,6 +28,30 @@ def check_paths(specs: dict, tool_name: str):
             elif isinstance(specs[key], dict):
                 Util.check_paths(specs[key], tool_name)
 
+    @staticmethod
+    def update_dataset_key(specs: dict, location, new_key="dataset"):
+        dataset_keys = list(specs["definitions"]["datasets"].keys())
+        assert len(dataset_keys) == 1, f"{location}: one dataset has to be defined under definitions/datasets, got {dataset_keys} instead."
+
+        orig_key = dataset_keys[0]
+
+        if orig_key != "dataset":
+            specs["definitions"]["datasets"][new_key] = specs["definitions"]["datasets"][orig_key]
+            specs["definitions"]["datasets"].pop(orig_key)
+
+            for instruction_key in specs["instructions"].keys():
+                if "dataset" in specs["instructions"][instruction_key]:
+                    specs["instructions"][instruction_key]["dataset"] = new_key
+
+                if "datasets" in specs["instructions"][instruction_key]:
+                    specs["instructions"][instruction_key]["datasets"] = [new_key]
+
+                if "analyses" in specs["instructions"][instruction_key]:
+                    for analysis_key in specs["instructions"][instruction_key]["analyses"].keys():
+                        specs["instructions"][instruction_key]["analyses"][analysis_key]["dataset"] = new_key
+
+            logging.info(f"{location}: renamed dataset '{orig_key}' to '{new_key}'.")
+
     @staticmethod
     def update_result_paths(specs: dict, result_path: Path, yaml_path: Path):
         for key, item in specs["definitions"]["datasets"].items():

diff --git a/immuneML/api/galaxy/build_yaml_from_arguments.py b/immuneML/api/galaxy/build_yaml_from_arguments.py
@@ -152,7 +152,7 @@ def build_specs(args):
     specs = {
         "definitions": {
             "datasets": {
-                "d1": {
+                "dataset": {
                     "format": "ImmuneML",
                     "params": None
                 }
@@ -187,7 +187,7 @@ def build_specs(args):
                     "training_percentage": 0.7,
                 },
                 "labels": [],
-                "dataset": "d1",
+                "dataset": "dataset",
                 "strategy": "GridSearch",
                 "metrics": [],
                 "number_of_processes": 10,
@@ -204,7 +204,7 @@ def build_specs(args):
     dataset_params = discover_dataset_params()
     labels = build_labels(args.labels)
 
-    specs["definitions"]["datasets"]["d1"]["params"] = dataset_params
+    specs["definitions"]["datasets"]["dataset"]["params"] = dataset_params
     specs["definitions"]["encodings"] = enc_specs
     specs["definitions"]["ml_methods"] = ml_specs
     specs["instructions"]["inst1"]["settings"] = settings_specs

diff --git a/test/api/galaxy/test_DataSimulationTool.py b/test/api/galaxy/test_DataSimulationTool.py
@@ -48,8 +48,8 @@ def test_run(self):
 
         run_immuneML(Namespace(**{"specification_path": yaml_path, "result_path": result_path, 'tool': "DataSimulationTool"}))
 
-        self.assertTrue(os.path.isfile(result_path / "result/d1_metadata.csv"))
-        self.assertTrue(os.path.isfile(result_path / "result/d1.iml_dataset"))
+        self.assertTrue(os.path.isfile(result_path / "result/dataset_metadata.csv"))
+        self.assertTrue(os.path.isfile(result_path / "result/dataset.iml_dataset"))
         self.assertEqual(200, len([name for name in os.listdir(result_path / "result/repertoires/")
                                    if os.path.isfile(os.path.join(result_path / "result/repertoires/", name))]))
 

diff --git a/test/api/galaxy/test_DatasetGenerationTool.py b/test/api/galaxy/test_DatasetGenerationTool.py
@@ -49,8 +49,8 @@ def test_run(self):
 
         run_immuneML(Namespace(**{"specification_path": yaml_path, "result_path": result_path, 'tool': "DatasetGenerationTool"}))
 
-        self.assertTrue(os.path.isfile(result_path / "result/d1_metadata.csv"))
-        self.assertTrue(os.path.isfile(result_path / "result/d1.iml_dataset"))
+        self.assertTrue(os.path.isfile(result_path / "result/dataset_metadata.csv"))
+        self.assertTrue(os.path.isfile(result_path / "result/dataset.iml_dataset"))
         self.assertEqual(200, len([name for name in os.listdir(result_path / "result/repertoires/")
                                    if os.path.isfile(os.path.join(result_path / "result/repertoires/", name))]))
 

diff --git a/test/api/galaxy/test_GalaxyYamlTool.py b/test/api/galaxy/test_GalaxyYamlTool.py
@@ -13,9 +13,9 @@
 
 
 class TestGalaxyYamlTool(TestCase):
-    def test_run(self):
+    def test_run1(self):
 
-        path = PathBuilder.build(EnvironmentSettings.tmp_test_path / "api_galaxy_yaml_tool/")
+        path = PathBuilder.build(EnvironmentSettings.tmp_test_path / "api_galaxy_yaml_tool1/")
         result_path = path / "result/"
 
         dataset = RandomDatasetGenerator.generate_repertoire_dataset(10, {10: 1}, {12: 1}, {}, result_path)
@@ -30,7 +30,36 @@ def test_run(self):
                         "params": {
                             "metadata_file": str(result_path / "d1_metadata.csv")
                         }
-                    },
+                    }
+                },
+            },
+            "instructions": {
+                "inst1": {
+                    "type": "DatasetExport",
+                    "datasets": ["new_d1"],
+                    "export_formats": ["AIRR"]
+                }
+            }
+        }
+
+        specs_path = path / "specs.yaml"
+        with open(specs_path, "w") as file:
+            yaml.dump(specs, file)
+
+        run_immuneML(Namespace(**{"specification_path": specs_path, "result_path": result_path / 'result/', 'tool': "GalaxyYamlTool"}))
+
+        self.assertTrue(os.path.exists(result_path / "result/inst1/dataset/AIRR"))
+
+        shutil.rmtree(path)
+
+    def test_run2(self):
+
+        path = PathBuilder.build(EnvironmentSettings.tmp_test_path / "api_galaxy_yaml_tool2/")
+        result_path = path / "result/"
+
+        specs = {
+            "definitions": {
+                "datasets": {
                     "d2": {
                         "format": "RandomRepertoireDataset",
                         "params": {
@@ -73,11 +102,6 @@ def test_run(self):
                 },
             },
             "instructions": {
-                "inst1": {
-                    "type": "DatasetExport",
-                    "datasets": ["new_d1", 'd2'],
-                    "export_formats": ["AIRR"]
-                },
                 "inst2": {
                     "type": "TrainMLModel",
                     "settings": [
@@ -116,10 +140,9 @@ def test_run(self):
         with open(specs_path, "w") as file:
             yaml.dump(specs, file)
 
-        run_immuneML(Namespace(**{"specification_path": specs_path, "result_path": result_path / 'result/', 'tool': "GalaxyYamlTool"}))
+        run_immuneML(Namespace(**{"specification_path": specs_path, "result_path": result_path, 'tool': "GalaxyYamlTool"}))
 
-        self.assertTrue(os.path.exists(result_path / "result/inst1/new_d1/AIRR"))
-        self.assertTrue(os.path.exists(result_path / "result/inst1/d2/AIRR"))
-        self.assertTrue(os.path.exists(result_path / "result/d2"))
+        self.assertTrue(os.path.exists(result_path / "dataset/"))
+        self.assertTrue(os.path.exists(result_path / "inst2/"))
 
         shutil.rmtree(path)