Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/opentargets/gentropy into xg…
Browse files Browse the repository at this point in the history
…1_l2g_isProteinCoding
  • Loading branch information
xyg123 committed Oct 25, 2024
2 parents 7213b51 + 3e61996 commit 4f02e16
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
11 changes: 9 additions & 2 deletions src/gentropy/dataset/l2g_features/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ def common_distance_feature_logic(
on="variantId",
how="inner",
)
.withColumn("distance_score", f.log10(distance_score_expr))
.withColumn(
"distance_score",
f.log10(distance_score_expr) / f.log10(f.lit(genomic_window + 1)),
)
.groupBy("studyLocusId", "geneId")
.agg(agg_expr.alias(feature_name))
)
Expand Down Expand Up @@ -105,7 +108,11 @@ def common_neighbourhood_distance_feature_logic(
"regional_metric",
f.mean(f.col(local_feature_name)).over(Window.partitionBy("studyLocusId")),
)
.withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric"))
.withColumn(
feature_name,
(f.col(local_feature_name) - f.col("regional_metric"))
/ f.log10(f.lit(genomic_window + 1)),
)
.drop("regional_metric", local_feature_name)
)

Expand Down
8 changes: 4 additions & 4 deletions tests/gentropy/dataset/test_l2g_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,15 +506,15 @@ class TestCommonDistanceFeatureLogic:
{
"studyLocusId": "1",
"geneId": "gene2",
"distanceSentinelTss": 0.95,
"distanceSentinelTss": 0.92,
},
],
),
(
"distanceTssMean",
[
{"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.09},
{"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.65},
{"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.08},
{"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.63},
],
),
],
Expand Down Expand Up @@ -569,7 +569,7 @@ def test_common_neighbourhood_distance_feature_logic(
.orderBy(f.col(feature_name).asc())
)
expected_df = spark.createDataFrame(
(["1", "gene1", -0.48], ["1", "gene2", 0.48]),
(["1", "gene1", -0.44], ["1", "gene2", 0.44]),
["studyLocusId", "geneId", feature_name],
).orderBy(feature_name)
assert (
Expand Down

0 comments on commit 4f02e16

Please sign in to comment.