Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PoC GTEx tissue scores #595

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
!/resources/README.txt
!/resources/decision_tree_*.json
/resources/vep/cache
resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct
!/resources/vep/plugins
!/resources/*.chain
!/resources/*.chain.gz
Expand All @@ -33,4 +34,4 @@ nextflow-*-all
/vip

# mkdocs
/site
/site
7 changes: 5 additions & 2 deletions config/nxf_vcf.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ params {
vep_plugin_inheritance = "${projectDir}/resources/inheritance_20240115.tsv"
vep_plugin_vkgl_mode = 1

vep_plugin_gtex = "${projectDir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct"

GRCh38 {
capice_model = "${projectDir}/resources/GRCh38/capice_model_v5.1.2-v2.ubj"
expansionhunter_variant_catalog = "${projectDir}/resources/GRCh38/expansionhunter_variant_catalog.json"
Expand All @@ -83,7 +85,8 @@ params {
metadata = "${projectDir}/resources/field_metadata.json"

GRCh38 {
decision_tree = "${projectDir}/resources/decision_tree_GRCh38.json"
//decision_tree = "${projectDir}/resources/decision_tree_GRCh38.json"
decision_tree = "${projectDir}/resources/PoC/decision_tree_GRCh38_Tissues.json"
}
}

Expand All @@ -109,7 +112,7 @@ params {
include_crams = true
max_records = ""
max_samples = ""
template = ""
template = "${projectDir}/resources/PoC/index.html"
metadata = "${projectDir}/resources/field_metadata.json"

GRCh38 {
Expand Down
12 changes: 12 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ download_files() {
for ((i = 0; i < ${#urls[@]}; i += 2)); do
download_file "${base_url}" "${urls[i+1]}" "${urls[i+0]}" "${output_dir}" "${validate}"
done
download_file "https://ftp.ensembl.org/pub/release-111/variation/indexed_vep_cache" "homo_sapiens_vep_111_GRCh38.tar.gz" "FIXME" "${output_dir}/resources/vep/cache/" "false"
download_file "https://storage.googleapis.com/adult-gtex/bulk-gex/v8/rna-seq" "GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz" "FIXME" "${output_dir}/resources/GTEx" "false"

}

extract_files() {
Expand All @@ -162,7 +165,16 @@ extract_files() {
echo -e "extracting ${vep_gz} ..."
tar -xzf "${vep_gz}" -C "${vep_dir}"
fi
if [ ! -d "${vep_dir}/homo_sapiens/111_GRCh38" ]; then
local -r vep_gz="${vep_dir}/homo_sapiens_vep_111_GRCh38.tar.gz"
echo -e "extracting ${vep_gz} ..."
tar -xzf "${vep_gz}" -C "${vep_dir}"
fi

if [ ! -f "${output_dir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct" ]; then
gunzip ${output_dir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz
fi

local -r annotsv_dir="${output_dir}/resources/annotsv/v3.3.6"

local -r annotsv_human_dir="${annotsv_dir}/Annotations_Human"
Expand Down
4 changes: 4 additions & 0 deletions modules/sample_sheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ def parseCommonSampleSheet(csvFilename, additionalCols) {
list: true,
regex: /HP:\d{7}/
],
tissues: [
type: "string",
list: true
],
sequencing_method: [
type: "string",
default: { 'WGS' },
Expand Down
4 changes: 3 additions & 1 deletion modules/vcf/annotate.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { basename; areProbandHpoIdsIndentical } from './utils'
include { basename; areProbandHpoIdsIndentical; getTissues } from './utils'

process annotate {
label 'vcf_annotate'
Expand Down Expand Up @@ -32,8 +32,10 @@ process annotate {
capiceModelPath = params.vcf.annotate[assembly].capice_model
alphScorePath = params.vcf.annotate[assembly].vep_plugin_alphscore
strangerCatalog = params.vcf.annotate[assembly].stranger_catalog
gtexFile = params.vcf.annotate.vep_plugin_gtex

areProbandHpoIdsIndentical = areProbandHpoIdsIndentical(meta.project.samples)
tissues = getTissues(meta.project.samples)
gadoScores = meta.gado != null ? meta.gado : ""

template 'annotate.sh'
Expand Down
5 changes: 3 additions & 2 deletions modules/vcf/classify.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { basename } from './utils'
include { basename; getTissues } from './utils'

process classify {
label 'vcf_classify'
Expand All @@ -18,7 +18,8 @@ process classify {
metadata = params.vcf.classify.metadata
decisionTree = params.vcf.classify[meta.project.assembly].decision_tree
annotatePath = params.vcf.classify.annotate_path

tissues = getTissues(meta.project.samples)

template 'classify.sh'

stub:
Expand Down
14 changes: 9 additions & 5 deletions modules/vcf/templates/annotate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ vep() {
args+=("--dir_cache" "!{params.vcf.annotate.vep_cache_dir}")
args+=("--species" "homo_sapiens")
args+=("--assembly" "!{assembly}")
args+=("--refseq")
#args+=("--refseq")
args+=("--exclude_predicted")
args+=("--use_given_ref")
args+=("--symbol")
Expand All @@ -160,20 +160,24 @@ vep() {
args+=("--dir_plugins" "!{params.vcf.annotate.vep_plugin_dir}")
args+=("--plugin" "Grantham")
args+=("--plugin" "SpliceAI,snv=!{vepPluginSpliceAiSnvPath},indel=!{vepPluginSpliceAiIndelPath}")
args+=("--plugin" "Capice,${capiceOutputPath}")
args+=("--plugin" "Capice,${capiceOutputPath},!{params.vcf.annotate.ensembl_gene_mapping}")
args+=("--plugin" "UTRannotator,!{vepPluginUtrAnnotatorPath}")
args+=("--custom" "!{vepCustomPhyloPPath},phyloP,bigwig,exact,0")
args+=("--safe")

if [ -n "!{hpoIds}" ]; then
args+=("--plugin" "Hpo,!{params.vcf.annotate.vep_plugin_hpo},!{hpoIds.replace(',', ';')}")
args+=("--plugin" "Hpo,!{params.vcf.annotate.vep_plugin_hpo},!{hpoIds.replace(',', ';')},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
if [ -n "!{gadoScores}" ]; then
args+=("--plugin" "GADO,!{gadoScores},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
args+=("--plugin" "Inheritance,!{params.vcf.annotate.vep_plugin_inheritance}")
if [ -n "!{gtexFile}" ]; then
echo !{tissues}
args+=("--plugin" "GTEx,!{gtexFile},!{tissues.replace(',', ';')}")
fi
args+=("--plugin" "Inheritance,!{params.vcf.annotate.vep_plugin_inheritance},,!{params.vcf.annotate.ensembl_gene_mapping}")
if [ -n "!{vepPluginVkglPath}" ] && [ -n "!{params.vcf.annotate.vep_plugin_vkgl_mode}" ]; then
args+=("--plugin" "VKGL,!{vepPluginVkglPath},!{params.vcf.annotate.vep_plugin_vkgl_mode}")
args+=("--plugin" "VKGL,!{vepPluginVkglPath},!{params.vcf.annotate.vep_plugin_vkgl_mode},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
if [ -n "!{vepPluginGnomAdPath}" ]; then
args+=("--plugin" "gnomAD,!{vepPluginGnomAdPath}")
Expand Down
13 changes: 12 additions & 1 deletion modules/vcf/templates/classify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ classify () {
args+=("-jar" "/opt/vcf-decision-tree/lib/vcf-decision-tree.jar")
args+=("--input" "!{vcf}")
args+=("--metadata" "!{metadata}")
args+=("--config" "!{decisionTree}")
args+=("--config" "decision_tree_updated.json")
if [ !{annotatePath} -eq 1 ]; then
args+=("--path")
fi
Expand Down Expand Up @@ -42,7 +42,18 @@ insert_alt(){
fi
}

write_tissue_file(){
echo !{tissues} | tr ',' '\n' > tissues.tsv
}

update_tree(){
tissuePath=$(realpath tissues.tsv)
sed "s|TISSUE_FILE_PATH|${tissuePath}|g" "!{decisionTree}" > decision_tree_updated.json
}

main () {
write_tissue_file
update_tree
store_alt
classify
insert_alt
Expand Down
12 changes: 12 additions & 0 deletions modules/vcf/utils.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def determineChunks(meta) {
return chunks
}

def getTissues(samples) {
def tissueKeys = samples.collectMany { sample -> sample.tissues }.unique()
def allTissues = ["Adipose_Subcutaneous","Adipose_Visceral","AdrenalGland","Artery_Aorta","Artery_Coronary","Artery_Tibial","Bladder","Brain_Amygdala","Brain_Anteriorcingulatecortex","Brain_Caudate","Brain_CerebellarHemisphere","Brain_Cerebellum","Brain_Cortex","Brain_FrontalCortex","Brain_Hippocampus","Brain_Hypothalamus","Brain_Nucleusaccumbens","Brain_Putamen","Brain_Spinalcord","Brain_Substantianigra","Breast_MammaryTissue","Cells_Culturedfibroblasts","Cells_EBV_transformedlymphocytes","Cervix_Ectocervix","Cervix_Endocervix","Colon_Sigmoid","Colon_Transverse","Esophagus_GastroesophagealJunction","Esophagus_Mucosa","Esophagus_Muscularis","FallopianTube","Heart_AtrialAppendage","Heart_LeftVentricle","Kidney_Cortex","Kidney_Medulla","Liver","Lung","MinorSalivaryGland","Muscle_Skeletal","Nerve_Tibial","Ovary","Pancreas","Pituitary","Prostate","Skin_NotSunExposed","Skin_SunExposed","SmallIntestine_TerminalIleum","Spleen","Stomach","Testis","Thyroid","Uterus","Vagina","WholeBlood"] as String[]
def tissues = []
for(tissueKey in tissueKeys){
tissues.addAll(Arrays.stream(allTissues)
.filter(tissue -> tissue.matches(".*${tissueKey}.*"))
.toArray(size -> new String[size]));
}
return tissues.join(",")
}

def scatter(meta) {
def chunks = determineChunks(meta)
def index = 0
Expand Down
Loading