Skip to content

Commit

Permalink
Adding missing dosage curations
Browse files Browse the repository at this point in the history
  • Loading branch information
tnavatar committed Jul 31, 2024
1 parent 59bcb99 commit f77c6d3
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 10 deletions.
6 changes: 5 additions & 1 deletion resources/base.edn
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,9 @@
{:name "http://dataexchange.clinicalgenome.org/gci-express"
:source "https://raw.githubusercontent.com/clingen-data-model/data-exchange-shared-json/master/json-from-gene-express/gci-express-with-entrez-ids.json"
:target "gci-express-with-entrez-ids.json"
:format :genegraph.gene-validity.base/gci-express}]
:format :genegraph.gene-validity.base/gci-express}
{:name "http://dataexchange.clinicalgenome.org/missing-dosage-curations"
:source "https://raw.githubusercontent.com/clingen-data-model/genegraph-gene-validity/main/resources/missing-dosage-curations.ttl"
:target "missing-dosage-curations.ttl"
:format :genegraph.framework.storage.rdf/turtle}]

8 changes: 4 additions & 4 deletions src/genegraph/gene_validity.clj
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
(def admin-env
(if (or (System/getenv "DX_JAAS_CONFIG_DEV")
(System/getenv "DX_JAAS_CONFIG")) ; prevent this in cloud deployments
{:platform "stage"
{:platform "prod"
:dataexchange-genegraph (System/getenv "DX_JAAS_CONFIG")
:local-data-path "data/"}
{}))
Expand All @@ -54,7 +54,7 @@
(gql-schema/merged-schema
{:executor direct-executor}))}
"dev" (assoc (env/build-environment "522856288592" ["dataexchange-genegraph"])
:version 7
:version 8
:name "dev"
:function (System/getenv "GENEGRAPH_FUNCTION")
:kafka-user "User:2189780"
Expand All @@ -75,7 +75,7 @@
{:executor direct-executor}))
"prod" (assoc (env/build-environment "974091131481" ["dataexchange-genegraph"])
:function (System/getenv "GENEGRAPH_FUNCTION")
:version 5
:version 6
:name "prod"
:kafka-user "User:2592237"
:fs-handle {:type :gcs
Expand Down Expand Up @@ -370,7 +370,7 @@
(def gv-tdb
{:type :rdf
:name :gv-tdb
:snapshot-handle (assoc (:fs-handle env) :path "gv-tdb-v10.nq.gz")
:snapshot-handle (assoc (:fs-handle env) :path "gv-tdb-v11.nq.gz")
:path (str (:local-data-path env) "/gv-tdb")})

(def response-cache-db
Expand Down
73 changes: 70 additions & 3 deletions src/genegraph/gv_setup.clj
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,12 @@
;; (initialized) Genegraph app and creates the topics and necessary
;; permissions for those topics.

;; There are four Genegraph instances that need to be set up to create a
;; There are three Genegraph instances that need to be set up to create a
;; working installation:

;; gv-base-app-def: listents to fetch topic, retrieves base data and notifies gql endpoint
;; gv-transformer-def: Transforms gene validity curations to SEPIO format, publishes to Kafka
;; gv-graphql-endpoint-def: Ingest curations from various sources, publish via GraphQL endpoint
;; gv-appender-def: Append topics that require pre-seeding (gene_validity_raw, gene_validity)
;; with data produced by GCI.

(comment
(run! #(kafka-admin/configure-kafka-for-app! (p/init %))
Expand Down Expand Up @@ -109,6 +107,13 @@
{::event/data %
::event/key (:name %)})))

(->> (-> "base.edn" io/resource slurp edn/read-string)
(filter #(= "http://dataexchange.clinicalgenome.org/missing-dosage-curations" (:name %)))
(run! #(p/publish (get-in gv-seed-base-event
[:topics :fetch-base-events])
{::event/data %
::event/key (:name %)})))

(p/stop gv-seed-base-event)
)

Expand Down Expand Up @@ -219,4 +224,66 @@
(event-files "/users/tristan/data/genegraph/2023-11-07T1617/events/:gci-raw-missing-data")))
)

;; Re-initialize sample data in dev

(comment

(let [genegraph-dev-user "User:2189780"
stanford-dev-user "User:193111"
data-exchange
{:type :kafka-cluster
;;:kafka-user "User:2189780"
:common-config {"ssl.endpoint.identification.algorithm" "https"
"sasl.mechanism" "PLAIN"
"request.timeout.ms" "20000"
"bootstrap.servers" "pkc-4yyd6.us-east1.gcp.confluent.cloud:9092"
"retry.backoff.ms" "500"
"security.protocol" "SASL_SSL"
"sasl.jaas.config" (System/getenv "DX_JAAS_CONFIG_DEV")}
:consumer-config {"key.deserializer"
"org.apache.kafka.common.serialization.StringDeserializer"
"value.deserializer"
"org.apache.kafka.common.serialization.StringDeserializer"}
:producer-config {"key.serializer"
"org.apache.kafka.common.serialization.StringSerializer"
"value.serializer"
"org.apache.kafka.common.serialization.StringSerializer"}}
stanford-app (p/init
{:type :genegraph-app
:kafka-clusters {:data-exchange (assoc data-exchange
:kafka-user
stanford-dev-user)}
:topics {:gene-validity-complete
(assoc gv/gene-validity-complete-topic
:type :kafka-producer-topic
:create-producer true)
:gene-validity-legacy-complete
(assoc gv/gene-validity-legacy-complete-topic
:type :kafka-producer-topic
:create-producer true)}})]

;; am careful to only delete the relevant topics on dev -- not prod!
#_(with-open [admin-client (kafka-admin/create-admin-client data-exchange)]
(run! #(try (kafka-admin/delete-topic admin-client
(:kafka-topic %))
(catch Exception e :topic (:kafka-topic %)))
[gv/gene-validity-legacy-complete-topic
gv/gene-validity-complete-topic]))

#_(run! #(kafka-admin/configure-kafka-for-app! %)
[stanford-app])
(p/start stanford-app)
#_(event-store/with-event-reader [r "/Users/tristan/data/genegraph-neo/gene_validity_legacy_complete-2024-07-30.edn.gz"]
(->> (event-store/event-seq r)
(map event/deserialize)
(run! #(p/publish (get-in stanford-app [:topics :gene-validity-legacy-complete])
%))))
(event-store/with-event-reader [r "/Users/tristan/data/genegraph-neo/gene_validity_complete-2024-07-30.edn.gz"]
(->> (event-store/event-seq r)
(map event/deserialize)
(run! #(p/publish (get-in stanford-app [:topics :gene-validity-complete])
%))))
(p/stop stanford-app))

)

134 changes: 132 additions & 2 deletions src/genegraph/user.clj
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@
(time (get-events-from-topic gv/gene-validity-complete-topic))
(get-events-from-topic gv/gene-validity-raw-topic)
(time (get-events-from-topic gv/gene-validity-legacy-complete-topic))

(time (get-events-from-topic gv/gene-validity-sepio-topic))

(/ 822646.824791 1000 60)
)

;; Gene Validity Interrogation
Expand Down Expand Up @@ -1080,8 +1084,8 @@ select ?x where {
:body (json/write-str {:query gv-query})}))

(tap>
(set/difference (gene-set stage-gv-result)
(gene-set prod-gv-result)))
(set/difference (gene-set prod-gv-result)
(gene-set stage-gv-result)))

(tap> prod-result)
)
Expand Down Expand Up @@ -1154,4 +1158,130 @@ select ?x where {



)

;; missing new curation
(comment
(defn process-gv-event [e]
(p/process (get-in gv-test-app [:processors :gene-validity-transform])
(assoc e
::event/completion-promise (promise)
::event/skip-local-effects true
::event/skip-publish-effects true)))

(def gfpt1
(event-store/with-event-reader [r "/Users/tristan/data/genegraph-neo/gene_validity_complete-2024-07-25.edn.gz"]
(->> (event-store/event-seq r)
(filter #(re-find #"a8f8af21-a5dc-41aa-9bd3-b38c3a98d55c"
(::event/value %)))
(into []))))
(-> gfpt1 first ::event/timestamp Instant/ofEpochMilli)

(def gfpt1-sepio
(event-store/with-event-reader [r "/Users/tristan/data/genegraph-neo/gg-gvs-stage-7-2024-07-25.edn.gz"]
(->> (event-store/event-seq r)
(filter #(re-find #"a8f8af21-a5dc-41aa-9bd3-b38c3a98d55c"
(::event/value %)))
(into []))))
(count gfpt1-sepio)

(def first-curation
(event-store/with-event-reader [r "/Users/tristan/data/genegraph-neo/gene_validity_complete-2024-07-25.edn.gz"]
(->> (event-store/event-seq r)
first)))
(-> gfpt1
first
process-gv-event
#_(dissoc :gene-validity/gci-model :gene-validity/model)
#_tap>
:gene-validity/model
rdf/pp-model)

(-> last-curation process-gv-event tap>)



[(-> gfpt1 first ::event/timestamp Instant/ofEpochMilli)
(-> last-curation ::event/timestamp Instant/ofEpochMilli)]

)


(comment
(def c (hc/build-http-client {:connect-timeout 100
:redirect-policy :always
:timeout (* 1000 60 10)}))

(def genes-query
"
{
genes(curation_activity: ALL, limit: null) {
count
gene_list {
label
curie
}
}
}")
(def prod-result
(hc/post "https://genegraph.prod.clingen.app/api"
{:http-client c
:content-type :json
:body (json/write-str {:query genes-query})}))

(def stage-result
(hc/post "https://genegraph-gene-validity.stage.clingen.app/api"
{:http-client c
:content-type :json
:body (json/write-str {:query genes-query})}))

(clojure.pprint/pprint
(set/difference (gene-set prod-result)
(gene-set stage-result)))

(def gvc-query
"
{
gene_validity_assertions(limit: null) {
count
curation_list {
curie
gene {
curie
label
}
}
}
}")

(defn gv-gene-set [result]
(->> (-> result
:body
(json/read-str :key-fn keyword)
:data
:gene_validity_assertions
:curation_list)
set))

(def prod-gv-result
(hc/post "https://genegraph.prod.clingen.app/api"
{:http-client c
:content-type :json
:body (json/write-str {:query gvc-query})}))

(def stage-gv-result
(hc/post "https://genegraph-gene-validity.stage.clingen.app/api"
{:http-client c
:content-type :json
:body (json/write-str {:query gvc-query})}))

(tap>
(set/difference (gv-gene-set stage-gv-result)
(gv-gene-set prod-gv-result)))

(tap> prod-result)
(tap>
(with-open [r (io/reader "/Users/tristan/code/data-exchange-shared-json/json-from-gene-express/gci-express-with-entrez-ids.json")]
(json/read r :key-fn keyword)))

)

0 comments on commit f77c6d3

Please sign in to comment.