Skip to content

Commit

Permalink
Should fix issue 9
Browse files Browse the repository at this point in the history
  • Loading branch information
rwdavies committed Dec 24, 2021
1 parent e706c8a commit 00ac9da
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
30 changes: 17 additions & 13 deletions QUILT/R/hla_prepare_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ make_and_save_hla_full_alleles_filled_in <- function(
##gives where each allele comes from, which is reference allele, and strand

##print(hla_region)

if (hla_region %in% supplementary_gene_info[, "gene"]) {

print_message(paste0("Processing HLA-", hla_region))
Expand All @@ -288,7 +289,6 @@ make_and_save_hla_full_alleles_filled_in <- function(
temp=grep("Please",this)
this=this[1:(temp-1)]


starts=grep("gDNA",this)
ll=getseqs(
starts[1]+2,
Expand All @@ -298,8 +298,8 @@ make_and_save_hla_full_alleles_filled_in <- function(
)
starts=c(starts,length(this)+2)

###amount to trim for first codon
####offset=as.double(this[starts[1]+1])
## amount to trim for first codon
## offset=as.double(this[starts[1]+1])

for(k in 2:(length(starts)-1)) {
ll=paste(ll,getseqs(starts[k]+2,starts[k+1]-1,paste(hla_region,"[*]",sep=""), this = this),sep="")
Expand All @@ -308,19 +308,19 @@ make_and_save_hla_full_alleles_filled_in <- function(
names(ll)=getnames(starts[1]+2,starts[2]-1,paste(hla_region,"[*]",sep=""), this = this)

first_row <- which(names(ll) == matches[, "allele"])
##if (first_row != matches[, "first_row"]) {
## stop("Error in lining up")
## }
## if (first_row != matches[, "first_row"]) {
## stop("Error in lining up")
## }

##print("...done")

#######find a match
## find a match

temp=matrix(nrow=length(ll),ncol=nchar(ll[1]))

for(i in 1:ncol(temp)) temp[,i]=substring(ll,i,i)
for(i in 1:ncol(temp)) temp[temp[,i]=="-",i]=temp[1,i]
#####need to alter
##need to alter

##for(i in 1:ncol(temp)) temp[temp[,i]=="*",i]=temp[1,i]

Expand Down Expand Up @@ -392,25 +392,29 @@ make_and_save_hla_full_alleles_filled_in <- function(
temp=temp[,ourpos>=qq[1] & ourpos<=qq[2]]
ourpos=ourpos[ourpos>=qq[1] & ourpos<=qq[2]]

####fill in
##fill in

####space separated
##space separated
oldvalue=nrow(temp)*ncol(temp)
newvalue=sum(temp=="*")

while(oldvalue>newvalue){

hapstore=haps
tempstore=temp
###guess missing values

##guess missing values
##print("Total missing still:")
##print(sum(temp=="*"))
##print("Total columns: ")
##print(ncol(haps))
##print("Processing column to find nearest neighbour:")

for(i in 1:ncol(haps)){

if(sum(temp[i,]=="*")){

## if(!i%%20) {print(c(i,sum(temp[i,]=="*")))}
## if(!i%%20) {print(c(i,sum(temp[i,]=="*")))}
####use haplotypes to find match
cond2=(temp[i,]!="*")
cond=haps[,i]!=-1
Expand All @@ -428,7 +432,7 @@ make_and_save_hla_full_alleles_filled_in <- function(
rownames(allelemat)=c("A","C","G","T","-","*")
for(k in 1:nrow(allelemat)) allelemat[k,]=colSums(newalleles==rownames(allelemat)[k])
best=1:ncol(allelemat);for(k in 1:ncol(allelemat)) best[k]=rownames(allelemat)[1:5][which(allelemat[1:5,k]==max(allelemat[1:5,k]))[1]]
best[colSums(allelemat[1:5,])==0]="*"
best[colSums(allelemat[1:5,, drop = FALSE])==0]="*"
temp[i,!cond2]=best


Expand Down
1 change: 1 addition & 0 deletions QUILT/R/quilt-hla-prepare-reference.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ QUILT_HLA_prepare_reference <- function(
nCores = 1
) {


x <- as.list(environment())
command_line <- paste0(
"QUILT_HLA_prepare_reference(",
Expand Down
18 changes: 12 additions & 6 deletions example/QUILT_hla_reference_panel_construction.Md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ Here code to generate the provided reference panel data package is provided and
Where you want to store the files as you work on them, and where the output files should go

```
inputs_dir=/data/smew1/rdavies/quilt_hla_2021_09_18/
test_dir=/data/smew1/rdavies/quilt_hla_2021_09_18/
reference_package_dir=/data/smew1/rdavies/quilt_hla_reference_panel_files_2021_09_18/
inputs_dir=/data/smew1/rdavies/quilt_hla_2021_12_24_3430/
test_dir=/data/smew1/rdavies/quilt_hla_2021_12_24_3430/
reference_package_dir=/data/smew1/rdavies/quilt_hla_2021_12_24_3430/quilt_hla_reference_panel_files/
mkdir -p ${test_dir}
mkdir -p ${inputs_dir}
mkdir -p ${reference_package_dir}
Expand All @@ -36,9 +36,15 @@ sed -i 's/sample population group sex/SAMPLE POP GROUP SEX/g' ${reference_sample
Here we download the IPD-IGMT data
```
## To download IPD-IGMT version 3.39, for example
ipdigmt_link=https://github.com/ANHIG/IMGTHLA/blob/032815608e6312b595b4aaf9904d5b4c189dd6dc/Alignments_Rel_3390.zip?raw=true
## To download IPD-IGMT version 3.43
ipdigmt_link=https://github.com/ANHIG/IMGTHLA/blob/3430/Alignments_Rel_3430.zip?raw=true
cd ${test_dir}
wget https://github.com/ANHIG/IMGTHLA/blob/032815608e6312b595b4aaf9904d5b4c189dd6dc/Alignments_Rel_3390.zip?raw=true
mv Alignments_Rel_3390.zip?raw=true Alignments_Rel_3390.zip
wget ${ipdigmt_link}
ipdigmt_filename=`basename ${ipdigmt_link} | sed 's/?raw=true//g'`
mv Alignments_Rel_3430.zip?raw=true ${ipdigmt_filename}
```

Here we download the database of HLA alleles
Expand Down Expand Up @@ -87,7 +93,7 @@ cd ~/proj/QUILT/ ## change to the directory where you've cloned QUILT
--nGen=100 \
--hla_gene_region_file=hla_ancillary_files/hlagenes.txt \
--hla_types_panel=${test_dir}/20181129_HLA_types_full_1000_Genomes_Project_panel.txt \
--ipd_igmt_alignments_zip_file=${test_dir}Alignments_Rel_3390.zip \
--ipd_igmt_alignments_zip_file=${test_dir}${ipdigmt_filename} \
--quilt_hla_supplementary_info_file=hla_ancillary_files/quilt_hla_supplementary_info.txt \
--full_regionStart=25587319 \
--full_regionEnd=33629686 \
Expand Down

0 comments on commit 00ac9da

Please sign in to comment.