Merge branch 'dev' of https://github.com/opentargets/gentropy into xg…

…1_l2g_isProteinCoding
opentargets · Oct 25, 2024 · 4f02e16 · 4f02e16
2 parents 7213b51 + 3e61996
commit 4f02e16
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 6 deletions.
diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py
@@ -64,7 +64,10 @@ def common_distance_feature_logic(
             on="variantId",
             how="inner",
         )
-        .withColumn("distance_score", f.log10(distance_score_expr))
+        .withColumn(
+            "distance_score",
+            f.log10(distance_score_expr) / f.log10(f.lit(genomic_window + 1)),
+        )
         .groupBy("studyLocusId", "geneId")
         .agg(agg_expr.alias(feature_name))
     )
@@ -105,7 +108,11 @@ def common_neighbourhood_distance_feature_logic(
             "regional_metric",
             f.mean(f.col(local_feature_name)).over(Window.partitionBy("studyLocusId")),
         )
-        .withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric"))
+        .withColumn(
+            feature_name,
+            (f.col(local_feature_name) - f.col("regional_metric"))
+            / f.log10(f.lit(genomic_window + 1)),
+        )
         .drop("regional_metric", local_feature_name)
     )
 

diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py
@@ -506,15 +506,15 @@ class TestCommonDistanceFeatureLogic:
                     {
                         "studyLocusId": "1",
                         "geneId": "gene2",
-                        "distanceSentinelTss": 0.95,
+                        "distanceSentinelTss": 0.92,
                     },
                 ],
             ),
             (
                 "distanceTssMean",
                 [
-                    {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.09},
-                    {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.65},
+                    {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.08},
+                    {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.63},
                 ],
             ),
         ],
@@ -569,7 +569,7 @@ def test_common_neighbourhood_distance_feature_logic(
             .orderBy(f.col(feature_name).asc())
         )
         expected_df = spark.createDataFrame(
-            (["1", "gene1", -0.48], ["1", "gene2", 0.48]),
+            (["1", "gene1", -0.44], ["1", "gene2", 0.44]),
             ["studyLocusId", "geneId", feature_name],
         ).orderBy(feature_name)
         assert (