diff --git a/mothur_otu.bat b/mothur_otu.bat index b2d8e48..1967c6a 100644 --- a/mothur_otu.bat +++ b/mothur_otu.bat @@ -82,3 +82,41 @@ cluster(column=final.dist, name=final.names) # Option 2: quick and dirty #cluster.split(fasta=final.fasta, taxonomy=final.taxonomy, name=final.names, taxlevel=3, processors=6) + +#The output from Option 2 should be about the same as Option 1. The remainder of this code uses Option 1 + +#create a table that indicates the number of times an OTU shows up in each sample also known as a shared file +#input: final.an.list and final.groups file +#output: final.an.shared file +make.shared(list=final.an.list, group=final.groups, label=0.03) + +#The final step to getting good OTU data is to normalize the number of sequences in each sample +#First we need to know how many sequences are in each step +count.groups() + +#sub-sample all the samples to the sample with the fewest sequences(4420) +#input: final.an.shared file +#output: final.an.unique.subsample.shared +sub.sample(shared=final.an.shared, size=4420) + +#get the taxonomy information for each of our OTUs +#DO NOT HAVE A TAXONOMY FILE +#classify.otu(list=final.an.list, name=final.names, taxonomy=final.taxonomy, label=0.03) + +##Phylotype +#goes through the taxonomy file and bins sequences together that have the same taxonomy +#DO NOT HAVE A TAXONOMY FILE +#phylotype(taxonomy=final.taxonomy, name=final.names, label=1) + +#make a shared file and standardize the number of sequences in each group +make.shared(list=final.tx.list, group=final.groups, label=1) +#DO NOT HAVE A TAXONOMY FILE +#sub.sample(shared=final.tx.shared, size=4420) + +#get the taxonomy of each phylotype +#DO NOT HAVE A TAXONOMY FILE +#classify.otu(list=final.tx.list, name=final.names, taxonomy=final.taxonomy, label=1) + +##Phylogenetic tree +#construct a phylip-formatted distance matrix +dist.seqs(fasta=final.fasta, output=phylip, processors=2)