Skip to content

Commit

Permalink
Adds script biom_phyla_summary.py
Browse files Browse the repository at this point in the history
Takes in a BIOM abundance table and prints out a summary of the percent
abundance of the top 20 Families, Genera, and Species.

Also currently lists the percent abundance of important species in the oral
cavity (color complexes). This will be made an option in a later release.
Along with a more generic ability to pass in a list of any phylogenetic level
or specific OTU.
  • Loading branch information
Shareef Dabdoub committed Sep 16, 2014
1 parent 0fa6e01 commit 803ad22
Showing 1 changed file with 79 additions and 0 deletions.
79 changes: 79 additions & 0 deletions bin/biom_phyla_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import sys
import json
from collections import defaultdict

def biom_summary(biom):
tamtcounts = defaultdict(int)
tot_seqs = 0.0

for row, col, amt in biom['data']:
tot_seqs += amt
rtax = biom['rows'][row]['metadata']['taxonomy']
for i,t in enumerate(rtax):
t = t.strip()
if i == len(rtax)-1 and len(t) > 3 and len(rtax[-1]) > 3:
t = 's__'+rtax[i-1].strip().split('_')[-1]+'_'+t.split('_')[-1]
tamtcounts[t] += amt

print 'Total sequence count: %i\n' % tot_seqs

def levelData(counts, seqCount, level='g'):
lvlData = sorted([(k, counts[k]) for k in counts if level+'_' in k], key=lambda x: x[1], reverse=True)
return ['{}: {:4.2f}%'.format(p[0], p[1]/seqCount*100) for p in lvlData]

def lookup(s, d):
for idx, item in enumerate(d):
if s in item:
return d[idx]
return s+': None'

print 'Top families:\n'
lvlDataF = levelData(tamtcounts, tot_seqs, 'f')
print lvlDataF[:20]

print '\nTop genera:\n'
lvlDataG = levelData(tamtcounts, tot_seqs)
print lvlDataG[:20]

print '\nTop species:\n'
lvlData = levelData(tamtcounts, tot_seqs, 's')
print lvlData[:20]


print '\nRed Complex:'
print lookup('s__Porphyromonas_gingivalis', lvlData)
print lookup('s__Treponema_denticola', lvlData)
print lookup('s__Tannerella_forsythia', lvlData)

print '\nOrange Complex: '
print lookup('s__Fusobacterium_nucleatum', lvlData)
print lookup('s__Prevotella_intermedia', lvlData)
print lookup('s__Prevotella_nigrescens', lvlData)
print lookup('s__Peptostreptococcus_micros', lvlData)

print '\nYellow Complex: '
print lookup('s__Streptococcus_sanguis', lvlData)
print lookup('s__Streptococcus_oralis', lvlData)
print lookup('s__Streptococcus_mitis', lvlData)
print lookup('s__Streptococcus_gordonii', lvlData)
print lookup('s__Streptococcus_intermedius', lvlData)

print '\nGreen Complex: '
print lookup('s__Aggregatibacter_actinomycetemcomitans', lvlData)
print lookup('s__Campylobacter_concisus', lvlData)
print lookup('s__Eikenella_corrodens', lvlData)
print lookup('g__Capnocytophaga', lvlDataG)

print '\nPurple Complex: '
print lookup('s__Veillonella_parvula', lvlData)
print lookup('s__Actinomyces_odontolyticus', lvlData)
print lookup('s__Selenomonas_noxia', lvlData)
print lookup('s__Actinomyces_naeslundii', lvlData)

def main():
with open(sys.argv[1], 'rU') as bF:
biom = json.loads(bF.readline())
biom_summary(biom)

if __name__ == '__main__':
main()

0 comments on commit 803ad22

Please sign in to comment.