opentargets · remo87 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024 · Nov 4, 2024
diff --git a/src/ontoform/transformers/homologue.py b/src/ontoform/transformers/homologue.py
@@ -1,21 +1,33 @@
+import json
+import subprocess
 from pathlib import Path
+from typing import BinaryIO
+from uuid import uuid4
 
 import polars as pl
 
-def transform(src: Path, dst: Path) -> None:
-    # load the homologues
-    initial = pl.read_json(src)
 
-    # prepare node data
-    inputGenes = pl.DataFrame(
-        initial['genes']
-    )
+def transform(src: BinaryIO, dst: BinaryIO) -> None:
+    jq_command = '.genes | {"genes": map({id: .id, name: .name})}'
+
+    # Generate a temporary file path tp store the source file
-    # Generate a temporary file path tp store the source file
+    # Generate a temporary file path to store the source file
-    # Generate a temporary file path tp store the source file
+    # Generate a temporary file path to store the source file
+    temp_path = Path(f'/tmp/{uuid4()}.json')
 
-    # extract genes list
-    genes_list = inputGenes.explode('genes').unnest('genes')
+    temp_path.write_bytes(src.read())
 
-    # read id and name
-    output = genes_list.select(["id","name"])
+    # Ejecuta jq con subprocess
+    result = subprocess.run(
+        ['jq', jq_command, temp_path],
+        capture_output=True,
+        text=True
+    )
 
-    # write the result
-    output.write_ndjson(dst)
+    # Verifica si el comando se ejecutó correctamente
+    if result.returncode == 0:
+        output = json.loads(result.stdout)
+        input_genes = pl.DataFrame(output, strict=False, infer_schema_length=3)
+        # extract genes list
+        genes_list = input_genes.unnest('genes')
+        # # read id and name
-        # # read id and name
+        # read id and name
-        # # read id and name
+        # read id and name
+        output = genes_list.select(['id', 'name'])
+        output.write_csv(dst, separator='\t', include_header=True)