Skip to content

Commit

Permalink
Merge pull request #3 from stjude/madetunj
Browse files Browse the repository at this point in the history
converted from python2 to python3
  • Loading branch information
madetunj authored Jan 10, 2020
2 parents c14e6a4 + 49cef09 commit cb4ce74
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 120 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CLONED using SOURCETREE from: https://bitbucket.org/young_computation/rose/src/m
* samtools
* R version > 3.4
* bedtools > 2
* python2
* python3

3) USAGE

Expand Down
36 changes: 18 additions & 18 deletions bin/ROSE_bamToGFF.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#bamToGFF.py

#script to grab reads from a bam that align to a .gff file
Expand All @@ -12,7 +12,7 @@

import os

from string import join,upper,maketrans
import string



Expand Down Expand Up @@ -41,16 +41,16 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
else:
MMR = 1

print('using a MMR value of %s' % (MMR))
print(('using a MMR value of %s' % (MMR)))

senseTrans = maketrans('-+.','+-+')
#senseTrans = maketrans('-+.','+-+') #deprecated

if ROSE_utils.checkChrStatus(bamFile) == 1:
print "has chr"
print("has chr")
hasChrFlag = 1
#sys.exit();
else:
print "does not have chr"
print("does not have chr")
hasChrFlag = 0
#sys.exit()

Expand All @@ -67,10 +67,10 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
for line in gff:
line = line[0:9]
if ticker%100 == 0:
print ticker
print(ticker)
ticker+=1
if not hasChrFlag:
line[0] = re.sub(r"chr",r"",line[0])
line[0] = re.sub(r"chr",r"",line[0])
gffLocus = ROSE_utils.Locus(line[0],int(line[3]),int(line[4]),line[6],line[1])
#print line[0]
#sys.exit()
Expand All @@ -86,11 +86,11 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
locus = ROSE_utils.Locus(locus.chr(),locus.start()-extension,locus.end(),locus.sense(),locus.ID())
extendedReads.append(locus)
if gffLocus.sense() == '+' or gffLocus.sense == '.':
senseReads = filter(lambda x:x.sense() == '+' or x.sense() == '.',extendedReads)
antiReads = filter(lambda x:x.sense() == '-',extendedReads)
senseReads = [x for x in extendedReads if x.sense() == '+' or x.sense() == '.']
antiReads = [x for x in extendedReads if x.sense() == '-']
else:
senseReads = filter(lambda x:x.sense() == '-' or x.sense() == '.',extendedReads)
antiReads = filter(lambda x:x.sense() == '+',extendedReads)
senseReads = [x for x in extendedReads if x.sense() == '-' or x.sense() == '.']
antiReads = [x for x in extendedReads if x.sense() == '+']

senseHash = defaultdict(int)
antiHash = defaultdict(int)
Expand All @@ -107,12 +107,12 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
antiHash[x]+=1

#now apply flooring and filtering for coordinates
keys = ROSE_utils.uniquify(senseHash.keys()+antiHash.keys())
keys = ROSE_utils.uniquify(list(senseHash.keys())+list(antiHash.keys()))
if floor > 0:

keys = filter(lambda x: (senseHash[x]+antiHash[x]) > floor,keys)
keys = [x for x in keys if (senseHash[x]+antiHash[x]) > floor]
#coordinate filtering
keys = filter(lambda x: gffLocus.start() < x < gffLocus.end(),keys)
keys = [x for x in keys if gffLocus.start() < x < gffLocus.end()]


#setting up the output table
Expand All @@ -131,15 +131,15 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False

while n <nBins:
n+=1
binKeys = filter(lambda x: i < x < i+binSize,keys)
binKeys = [x for x in keys if i < x < i+binSize]
binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
clusterLine+=[round(binDen/MMR,4)]
i = i+binSize
else:
i = gffLocus.end()
while n < nBins:
n+=1
binKeys = filter(lambda x: i-binSize < x < i,keys)
binKeys = [x for x in keys if i-binSize < x < i]
binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
clusterLine+=[round(binDen/MMR,4)]
i = i-binSize
Expand Down Expand Up @@ -192,7 +192,7 @@ def main():
bamFile = options.bam
fullPath = os.path.abspath(bamFile)
bamName = fullPath.split('/')[-1].split('.')[0]
pathFolder = join(fullPath.split('/')[0:-1],'/')
pathFolder = '/'.join(fullPath.split('/')[0:-1])
fileList = os.listdir(pathFolder)
hasBai = False
for fileName in fileList:
Expand Down
4 changes: 2 additions & 2 deletions bin/ROSE_callSuper.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ if(wceName == 'NONE'){
}else{
plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,'_enhancers'),ylab=paste(rankBy_factor,' Signal','- ',wceName),pch=19,cex=2)
}
abline(h=cutoff_options$absolute,color='grey',lty=2)
abline(v=length(rankBy_vector)-length(superEnhancerRows),color='grey',lty=2)
abline(h=cutoff_options$absolute,col='grey',lty=2)
abline(v=length(rankBy_vector)-length(superEnhancerRows),col='grey',lty=2)
lines(length(rankBy_vector):1,rankBy_vector[signalOrder],lwd=4, col='red')
text(0,0.8*max(rankBy_vector),paste(' Cutoff used: ',cutoff_options$absolute,'\n','Super-Enhancers identified: ',length(superEnhancerRows)),pos=4)

Expand Down
35 changes: 16 additions & 19 deletions bin/ROSE_geneMapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#130428

#ROSE_geneMapper.py
Expand All @@ -10,14 +10,11 @@

import sys



import ROSE_utils


import os

from string import upper,join
import string

from collections import defaultdict

Expand All @@ -33,9 +30,9 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
'''
maps genes to enhancers. if uniqueGenes, reduces to gene name only. Otherwise, gives for each refseq
'''
print "Herp"
print("Herp")
startDict = ROSE_utils.makeStartDict(annotFile)
print "Derp"
print("Derp")
enhancerTable = ROSE_utils.parseTable(enhancerFile,'\t')


Expand All @@ -46,7 +43,7 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
transcribedTable = ROSE_utils.parseTable(transcribedFile,'\t')
transcribedGenes = [line[1] for line in transcribedTable]
else:
transcribedGenes = startDict.keys()
transcribedGenes = list(startDict.keys())

print('MAKING TRANSCRIPT COLLECTION')
transcribedCollection = ROSE_utils.makeTranscriptCollection(annotFile,0,0,500,transcribedGenes)
Expand Down Expand Up @@ -132,24 +129,24 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
#print distList.index(min(distList))
#print min(distList)
#print len(distList)
#print len(allEnhancerGenes[distList.index(min(distList))])
#print line
#print len(startDict[allEnhancerGenes[distList.index(min(distList))]])
#print len(allEnhancerGenes[distList.index(min(distList))])
#print line
#print len(startDict[allEnhancerGenes[distList.index(min(distList))]])
closestGene = startDict[allEnhancerGenes[distList.index(min(distList))]]['name']

#NOW WRITE THE ROW FOR THE ENHANCER TABLE
newEnhancerLine = line[0:6]
if byRefseq:
newEnhancerLine.append(join(ROSE_utils.uniquify([x for x in overlappingGenes]),','))
newEnhancerLine.append(join(ROSE_utils.uniquify([x for x in proximalGenes]),','))
newEnhancerLine.append(','.join(ROSE_utils.uniquify([x for x in overlappingGenes])))
newEnhancerLine.append(','.join(ROSE_utils.uniquify([x for x in proximalGenes])))
#print newEnhancerLine
#print len(allEnhancerGenes)
#print distList
closestGene = allEnhancerGenes[distList.index(min(distList))]
newEnhancerLine.append(closestGene)
else:
newEnhancerLine.append(join(ROSE_utils.uniquify([startDict[x]['name'] for x in overlappingGenes]),','))
newEnhancerLine.append(join(ROSE_utils.uniquify([startDict[x]['name'] for x in proximalGenes]),','))
newEnhancerLine.append(','.join(ROSE_utils.uniquify([startDict[x]['name'] for x in overlappingGenes])))
newEnhancerLine.append(','.join(ROSE_utils.uniquify([startDict[x]['name'] for x in proximalGenes])))
closestGene = startDict[allEnhancerGenes[distList.index(min(distList))]]['name']
newEnhancerLine.append(closestGene)

Expand Down Expand Up @@ -187,7 +184,7 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
proxEnhancers = geneDict['proximal'][refID] + geneDict['overlapping'][refID]


newLine = [geneName,refID,join(proxEnhancers,',')]
newLine = [geneName,refID,','.join(proxEnhancers)]
geneToEnhancerTable.append(newLine)

#re-sort enhancerToGeneTable
Expand Down Expand Up @@ -245,12 +242,12 @@ def main():
if options.out:
outFolder = ROSE_utils.formatFolder(options.out,True)
else:
outFolder = join(enhancerFile.split('/')[0:-1],'/') + '/'
outFolder = '/'.join(enhancerFile.split('/')[0:-1]) + '/'


#GETTING THE GENOME
genome = options.genome
print('USING %s AS THE GENOME' % genome)
print(('USING %s AS THE GENOME' % genome))


#GETTING THE CORRECT ANNOT FILE
Expand All @@ -263,7 +260,7 @@ def main():
'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
}

annotFile = genomeDict[upper(genome)]
annotFile = genomeDict[genome.upper()]

#GETTING THE TRANSCRIBED LIST
if options.geneList:
Expand Down
40 changes: 20 additions & 20 deletions bin/ROSE_main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3

#mapEnhancerFromFactor.py
'''
Expand All @@ -19,7 +19,7 @@

import os

from string import upper,join
import string

from collections import defaultdict

Expand All @@ -46,7 +46,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
#this loop makes a locus centered around +/- tssWindow of transcribed genes
#then adds it to the list tssLoci
tssLoci = []
for geneID in startDict.keys():
for geneID in list(startDict.keys()):
tssLoci.append(ROSE_utils.makeTSSLocus(geneID,startDict,tssWindow,tssWindow))


Expand All @@ -67,7 +67,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
boundCollection.remove(locus)
debugOutput.append([locus.__str__(),locus.ID(),'CONTAINED'])
removeTicker+=1
print('REMOVED %s LOCI BECAUSE THEY WERE CONTAINED BY A TSS' % (removeTicker))
print(('REMOVED %s LOCI BECAUSE THEY WERE CONTAINED BY A TSS' % (removeTicker)))

#boundCollection is now all enriched region loci that don't overlap an active TSS
stitchedCollection = boundCollection.stitchCollection(stitchWindow,'both')
Expand All @@ -77,7 +77,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
#with the original loci that were there
fixedLoci = []
tssLoci = []
for geneID in startDict.keys():
for geneID in list(startDict.keys()):
tssLoci.append(ROSE_utils.makeTSSLocus(geneID,startDict,50,50))


Expand All @@ -101,8 +101,8 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
else:
fixedLoci.append(stitchedLocus)

print('REMOVED %s STITCHED LOCI BECAUSE THEY OVERLAPPED MULTIPLE TSSs' % (removeTicker))
print('ADDED BACK %s ORIGINAL LOCI' % (originalTicker))
print(('REMOVED %s STITCHED LOCI BECAUSE THEY OVERLAPPED MULTIPLE TSSs' % (removeTicker)))
print(('ADDED BACK %s ORIGINAL LOCI' % (originalTicker)))
fixedCollection = ROSE_utils.LocusCollection(fixedLoci,50)
return fixedCollection,debugOutput
else:
Expand Down Expand Up @@ -163,16 +163,16 @@ def mapCollection(stitchedCollection,referenceCollection,bamFileList,mappedFolde

bamFileName = bamFile.split('/')[-1]

print('GETTING MAPPING DATA FOR %s' % bamFile)
print(('GETTING MAPPING DATA FOR %s' % bamFile))
#assumes standard convention for naming enriched region gffs

#opening up the mapped GFF
print('OPENING %s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName))
print(('OPENING %s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName)))

mappedGFF =ROSE_utils.parseTable('%s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName),'\t')

signalDict = defaultdict(float)
print('MAKING SIGNAL DICT FOR %s' % (bamFile))
print(('MAKING SIGNAL DICT FOR %s' % (bamFile)))
mappedLoci = []
for line in mappedGFF[1:]:

Expand Down Expand Up @@ -268,13 +268,13 @@ def main():
ROSE_utils.bedToGFF(options.input,inputGFFFile)
elif options.input.split('.')[-1] =='gff':
#COPY THE INPUT GFF TO THE GFF FOLDER
inputGFFFile = options.input
inputGFFFile = options.input
os.system('cp %s %s' % (inputGFFFile,gffFolder))

else:
print('WARNING: INPUT FILE DOES NOT END IN .gff or .bed. ASSUMING .gff FILE FORMAT')
#COPY THE INPUT GFF TO THE GFF FOLDER
inputGFFFile = options.input
inputGFFFile = options.input
os.system('cp %s %s' % (inputGFFFile,gffFolder))


Expand All @@ -301,13 +301,13 @@ def main():
removeTSS = False

#GETTING THE BOUND REGION FILE USED TO DEFINE ENHANCERS
print('USING %s AS THE INPUT GFF' % (inputGFFFile))
print(('USING %s AS THE INPUT GFF' % (inputGFFFile)))
inputName = inputGFFFile.split('/')[-1].split('.')[0]


#GETTING THE GENOME
genome = options.genome
print('USING %s AS THE GENOME' % genome)
print(('USING %s AS THE GENOME' % genome))


#GETTING THE CORRECT ANNOT FILE
Expand All @@ -320,7 +320,7 @@ def main():
'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
}

annotFile = genomeDict[upper(genome)]
annotFile = genomeDict[genome.upper()]

#MAKING THE START DICT
print('MAKING START DICT')
Expand Down Expand Up @@ -354,19 +354,19 @@ def main():
#WRITING DEBUG OUTPUT TO DISK

if debug:
print('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile))
print(('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile)))
ROSE_utils.unParseTable(debugOutput,debugOutFile,'\t')

#WRITE THE GFF TO DISK
print('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile))
print(('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile)))
ROSE_utils.unParseTable(stitchedGFF,stitchedGFFFile,'\t')



#SETTING UP THE OVERALL OUTPUT FILE
outputFile1 = outFolder + stitchedGFFName + '_ENHANCER_REGION_MAP.txt'

print('OUTPUT WILL BE WRITTEN TO %s' % (outputFile1))
print(('OUTPUT WILL BE WRITTEN TO %s' % (outputFile1)))

#MAPPING TO THE NON STITCHED (ORIGINAL GFF)
#MAPPING TO THE STITCHED GFF
Expand Down Expand Up @@ -413,7 +413,7 @@ def main():
'''
outputDone = True
if ticker%6 == 0:
print(ticker*5)
print((ticker*5))
ticker +=1
#CHANGE THIS PARAMETER TO ALLOW MORE TIME TO MAP
if ticker == 144:
Expand Down Expand Up @@ -442,7 +442,7 @@ def main():
if outputDone == True:
break
time.sleep(300)
print('MAPPING TOOK %s MINUTES' % (ticker*5))
print(('MAPPING TOOK %s MINUTES' % (ticker*5)))

print('BAM MAPPING COMPLETED NOW MAPPING DATA TO REGIONS')
#CALCULATE DENSITY BY REGION
Expand Down
Loading

0 comments on commit cb4ce74

Please sign in to comment.