Skip to content

Commit

Permalink
feat: add prefer-deletion-insertion option to coding-dna
Browse files Browse the repository at this point in the history
  • Loading branch information
nokara26 committed Feb 6, 2025
1 parent 4847fa7 commit 5a82362
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/varity/hgvs.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
{:prefer-deletion? true}
{:prefer-insertion? false}
{:prefer-insertion? true}
{:prefer-deletion-insertion? false}
{:prefer-deletion-insertion? true}
{:prefer-extension-for-initial-codon-alt? false}
{:prefer-extension-for-initial-codon-alt? true}])

Expand Down
9 changes: 9 additions & 0 deletions src/varity/vcf_to_hgvs.clj
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
(def ^:private default-options
{:prefer-deletion? false
:prefer-insertion? false
:prefer-deletion-insertion? false
:prefer-extension-for-initial-codon-alt? false
:tx-margin 5000
:verbose? false})
Expand All @@ -91,6 +92,10 @@
:prefer-insertion? Prefer insertion (e.g. \"c.9_10insAGG\") to repeated
sequences (e.g. \"c.4_6[3]\"), default false.
:prefer-deletion-insertion? Prefer indel (e.g. \"c.18_20delATCinsGAT\")
to inversion (e.g. \"c.18_20inv\"),
default false.
:tx-margin The length of transcription margin, up to a maximum of
10000, default 5000.
Expand Down Expand Up @@ -234,6 +239,10 @@
:prefer-insertion? Prefer insertion (e.g. \"c.9_10insAGG\") to repeated
sequences (e.g. \"c.4_6[3]\"), default false.
:prefer-deletion-insertion? Prefer indel (e.g. \"c.18_20delATCinsGAT\")
to inversion (e.g. \"c.18_20inv\"),
default false.
:prefer-extension-for-initial-codon-alt? Prefer extension to protein unknown variant
that affects initial codon, default false.
Expand Down
6 changes: 4 additions & 2 deletions src/varity/vcf_to_hgvs/coding_dna.clj
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
:reverse (repeat-info-backward seq-rdr rg pos alt type))))

(defn- mutation-type
[seq-rdr rg pos ref alt {:keys [prefer-deletion? prefer-insertion?]}]
[seq-rdr rg pos ref alt {:keys [prefer-deletion? prefer-insertion? prefer-deletion-insertion?]}]
(if (re-matches #"[acgntACGNT]*" alt)
(let [[ref-only alt-only offset _] (diff-bases ref alt)
nrefo (count ref-only)
Expand All @@ -79,7 +79,9 @@
(or (= nrefo nalto 0) (= nrefo nalto 1)) :substitution
(and prefer-deletion? (pos? nrefo) (zero? nalto)) :deletion
(and prefer-insertion? (zero? nrefo) (pos? nalto)) :insertion
(= ref-only (util-seq/revcomp alt-only)) :inversion
(= ref-only (util-seq/revcomp alt-only)) (if (and prefer-deletion-insertion? (pos? nrefo) (pos? nalto))
:indel
:inversion)
(and (some? unit) (= ref-repeat 1) (= alt-repeat 2)) :duplication
(and (some? unit) (pos? alt-repeat)
;; In the protein coding region, repeat descriptions are used only
Expand Down
9 changes: 9 additions & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,15 @@
"chr3" 126492636 "C" "CCTCT" {:prefer-insertion? true} '("NM_001165974:c.1690-121_1690-120insAGAG"
"NM_144639:c.1510-121_1510-120insAGAG")

;; prefer-deletion-inserion?
;; inversion cf. rs267608133 (+)
"chr2" 47806747 "AAAACTTTTTTTTTTTTTTTTTTAA" "ATTAAAAAAAAAAAAAAAAAAGTTT" {:prefer-deletion-insertion? true} '("NM_000179:c.4002-31_4002-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT"
"NM_001281492:c.3612-31_3612-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT"
"NM_001281493:c.3096-31_3096-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT"
"NM_001281494:c.3096-31_3096-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT"
"NM_025133:c.*1347_*1370delTTAAAAAAAAAAAAAAAAAAGTTTinsAAACTTTTTTTTTTTTTTTTTTAA"
"NM_001190274:c.*1347_*1370delTTAAAAAAAAAAAAAAAAAAGTTTinsAAACTTTTTTTTTTTTTTTTTTAA")

;; tx-margin
"chr5" 1295113 "G" "A" {:tx-margin 5000} '("NM_001193376:c.-124C>T"
"NM_198253:c.-124C>T")
Expand Down

0 comments on commit 5a82362

Please sign in to comment.