Skip to content

Commit

Permalink
fix: add in-frame check process to indel for around ter site variant
Browse files Browse the repository at this point in the history
  • Loading branch information
nokara26 committed Feb 4, 2025
1 parent 4847fa7 commit 2f5fc0d
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 3 deletions.
50 changes: 47 additions & 3 deletions src/varity/vcf_to_hgvs/protein.clj
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,22 @@
(and (= pos ter-start-pos) (<= ter-end-pos pos-end))
(and (= pos-end ter-start-pos) (<= pos ter-end-pos)))))

(defn- ref-include-from-ter-upstream-and-over-ter-end?
[{:keys [strand cds-start cds-end]} pos ref alt]
(let [[del _ offset _] (diff-bases ref alt)
pos (+ pos offset)
ndel (count del)
ter-start-pos (if (= strand :forward)
(- cds-end 2)
(+ cds-start 2))
ter-end-pos (if (= strand :forward)
cds-end
cds-start)
pos-end (+ pos (if (= ndel 0) 0 (dec ndel)))]
(if (= strand :forward)
(and (< pos ter-start-pos) (< ter-end-pos pos-end))
(and (< ter-start-pos pos-end) (< pos ter-end-pos)))))

(defn- ter-site-same-pos?
[ref-prot-seq alt-prot-seq]
(and (string/includes? ref-prot-seq "*")
Expand Down Expand Up @@ -260,6 +276,25 @@
:else
pos-start*)))

(defn- in-frame?
[pos ref alt {:keys [cds-start cds-end strand] :as _rg}]
(let [[del ins offset] (diff-bases ref alt)
ndel (count del)
nins (count ins)
pos* (+ pos offset)
pos-end (+ pos offset (dec ndel))
over-ter-site? (if (= strand :forward)
(< pos cds-end pos-end)
(< pos cds-start pos-end))
ndel-to-cds-end (if (= strand :forward)
(inc (- cds-end pos*))
(inc (- pos-end cds-start)))
ndel* (if over-ter-site?
ndel-to-cds-end
ndel)]
(or (= ndel nins 1)
(= 0 (rem (- ndel* nins) 3)))))

(defn- apply-offset
[pos ref alt cds-start cds-end exon-ranges pos*]
(let [[del ins offset _] (diff-bases ref alt)
Expand Down Expand Up @@ -288,6 +323,7 @@
ref-include-utr-ini-site-boundary (include-utr-ini-site-boundary? rg pos ref alt)
ref-include-ter-site (include-ter-site? rg pos ref alt)
ref-include-from-ter-start-and-over-ter-end (ref-include-from-ter-start-and-over-ter-end? rg pos ref alt)
ref-include-from-ter-upstream-and-over-ter-end (ref-include-from-ter-upstream-and-over-ter-end? rg pos ref alt)
frameshift-within-cds (frameshift-within-cds? rg pos ref alt)
alt-seq (common/alt-sequence ref-seq tx-start pos ref alt)
alt-exon-ranges* (alt-exon-ranges exon-ranges pos ref alt)
Expand All @@ -301,7 +337,8 @@
alt-up-exon-seq (make-alt-up-exon-seq alt-up-exon-seq tx-start (dec alt-cds-start) alt-exon-ranges* strand)
alt-down-exon-seq (make-alt-down-exon-seq alt-down-exon-seq (inc alt-cds-end) alt-tx-end alt-exon-ranges* strand)
ter-site-adjusted-alt-seq (make-ter-site-adjusted-alt-seq alt-cds-exon-seq alt-up-exon-seq alt-down-exon-seq
strand cds-start cds-end pos ref ref-include-ter-site)]
strand cds-start cds-end pos ref ref-include-ter-site)
in-frame (in-frame? pos ref alt rg)]
{:ref-exon-seq ref-cds-exon-seq
:ref-prot-seq (codon/amino-acid-sequence (cond-> ref-cds-exon-seq
(= strand :reverse) util-seq/revcomp))
Expand All @@ -327,8 +364,10 @@
:ref-include-utr-ini-site-boundary ref-include-utr-ini-site-boundary
:ref-include-ter-site ref-include-ter-site
:ref-include-from-ter-start-and-over-ter-end ref-include-from-ter-start-and-over-ter-end
:ref-include-from-ter-upstream-and-over-ter-end ref-include-from-ter-upstream-and-over-ter-end
:frameshift-within-cds frameshift-within-cds
:utr-variant (utr-variant? cds-start cds-end pos ref alt)})))
:utr-variant (utr-variant? cds-start cds-end pos ref alt)
:in-frame in-frame})))

(defn- protein-position
"Converts genomic position to protein position. If pos is outside of CDS,
Expand Down Expand Up @@ -621,7 +660,8 @@

(defn- protein-indel
[ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq
ref-include-ter-site frameshift-within-cds] :as seq-info}]
ref-include-ter-site frameshift-within-cds
ref-include-from-ter-upstream-and-over-ter-end in-frame] :as seq-info}]
(let [[pref* palt* ppos*] (if ref-include-ter-site
(let [{adjusted-ppos :ppos} (get-first-diff-aa-info ppos ref-prot-seq c-ter-adjusted-alt-prot-seq)
ppos (or adjusted-ppos ppos)
Expand Down Expand Up @@ -658,6 +698,10 @@

(empty? ins)
(protein-deletion ppos* pref* palt*)

(and ref-include-from-ter-upstream-and-over-ter-end
(not in-frame))
(protein-frame-shift ppos* seq-info)

alt-retain-ter-site?
(mut/protein-indel (mut/->long-amino-acid (first del))
Expand Down
57 changes: 57 additions & 0 deletions test/varity/vcf_to_hgvs/protein_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,28 @@
false? :reverse 11 "C" "CATG"
false? :reverse 8 "CGTC" "C")))

(deftest ref-include-from-ter-upstream-and-over-ter-end?-test
(let [cds-start 10
cds-end 21]
(are [p strand pos ref alt] (p (#'prot/ref-include-from-ter-upstream-and-over-ter-end? {:strand strand
:cds-start cds-start
:cds-end cds-end}
pos
ref
alt))
true? :forward 17 "AATAAG" "A"
true? :forward 17 "AATAAG" "AGC"
true? :reverse 8 "CGTCAC" "C"
true? :reverse 8 "CGTCAC" "CTG"
false? :forward 17 "AATAA" "A"
false? :forward 18 "A" "T"
false? :forward 19 "T" "TCCCT"
false? :forward 18 "ATA" "A"
false? :reverse 9 "GTCAC" "G"
false? :reverse 10 "T" "A"
false? :reverse 11 "C" "CATG"
false? :reverse 8 "CGTC" "C")))

(deftest ter-site-same-pos?-test
(are [p ref alt] (p (#'prot/ter-site-same-pos? ref alt))
true? "MTGA*" "MTGA*"
Expand Down Expand Up @@ -256,6 +278,41 @@
100 115 99
75 110 50)))

(deftest in-frame?-test
(let [in-frame? (fn [pos ref alt strand] (#'prot/in-frame? pos ref alt {:cds-start 101
:cds-end 300
:strand strand}))]
(testing "within cds"
(are [pred pos ref alt strand] (pred (in-frame? pos ref alt strand))
true? 200 "A" "T" :forward
true? 200 "AGGC" "A" :forward
true? 200 "A" "ATCG" :forward
true? 200 "AGT" "ACCCTG" :forward
true? 150 "T" "A" :reverse
true? 150 "GCTC" "G" :reverse
true? 150 "T" "TCCG" :reverse
true? 150 "TGG" "TCCAAC" :reverse
false? 200 "AGG" "A" :forward
false? 200 "A" "ATC" :forward
false? 200 "AGT" "ACCCT" :forward
false? 150 "GCT" "G" :reverse
false? 150 "T" "TCC" :reverse
false? 150 "TGG" "TCCAA" :reverse))
(testing "over ter site"
(are [pred pos ref alt strand] (pred (in-frame? pos ref alt strand))
true? 294 "CAGTTGAAG" "C" :forward
true? 294 "CAGTTGAAG" "CGTC" :forward
true? 296 "GTTGAAG" "GCCAA" :forward
false? 295 "AGTTGAAG" "A" :forward
false? 295 "AGTTGAAG" "ACTC" :forward
false? 294 "CAGTTGAAG" "CGT" :forward
true? 99 "GTTTACGA" "G" :reverse
true? 99 "GTTTACGA" "GCCT" :reverse
true? 99 "GTTTACGAC" "GA" :reverse
false? 99 "GTTTACG" "G" :reverse
false? 96 "CAAGTTTACG" "C" :reverse
false? 99 "GTTTACGA" "GAT" :reverse))))

(deftest apply-offset-test
(testing "ref not include exon terminal"
(let [ref "GCTGACC"
Expand Down
1 change: 1 addition & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@
"chr10" 87965466 "AGTCT" "A" '("p.V403Efs*12" "p.V576Efs*12" "p.V206Efs*12") ; not actual example (+)
"chr10" 87965465 "AAGTCT" "A" '("p.V403Nfs*17" "p.V576Nfs*17" "p.V206Nfs*17") ; not actual example (+)
"chr10" 87965463 "AAAAGTCT" "A" '("p.K402Efs*12" "p.K575Efs*12" "p.K205Efs*12") ; not actual example (+)
"chr13" 24421117 "ACTTAGC" "A" '("p.G1724Vfs*3") ; not actual example (-)

;; Extension
"chr2" 189011772 "T" "C" '("p.*1467Qext*45") ; cf. ClinVar 101338
Expand Down

0 comments on commit 2f5fc0d

Please sign in to comment.