-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpatternmatching.py
25 lines (23 loc) · 9.84 KB
/
patternmatching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
__author__ = 'ssen'
def patternmatching(pattern, genome):
indexMatch = []
lenPattern = len(pattern)
lenGenome = len(genome)
count = 0
for i in range(0, lenGenome-lenPattern):
if (pattern == genome[i:i+lenPattern]):
count = count + 1
indexMatch.append(i)
return indexMatch
#print patternmatching('ATAT','GATATATGCATATACTT' )
#f=open('C:/vibrio_cholerae.txt')
#s=f.read()
a=""
indicesCombined = patternmatching('GCGCATCGC','GCGCATCAGCGCATCGCGCATCACACGCAGCGCATCAGTGCGCATCTGCGCATCTGCGCATCGGAAGGCGCATCGGCGCATCTTGTGCGCATCGCGCATCTAGCGCATCTGCGCATCGTCCTGCGCATCAGGGTAGCGCATCGCGCATCGGCGCATCGCGCATCGCGCATCCCGCGCATCTGCGAACACAGCGCATCGTAATGCGCATCGCGCATCCGGAGCGCATCGCGCATCGCGCATCAAACTACCTTTGCGCATCTCCCATGCGCATCTTCGGGCGCATCGCGCATCGCGCATCTGCGCGCATCACATGCGCATCTCGCGCATCGCAAGCGCATCAGCGCATCGCGCATCCGCTACAGCGCATCTGGCGCATCGCGCATCATTCAAGCGAGGCGCATCGGCGCATCGCGCATCGCGCATCGCGCATCAGTTCCGCGCATCGCGCATCATTCGGTTGCGCATCGCGCATCAAGCGCATCCGACCGCGCGTTGCACCGCGCATCGCAACTACTGCGCATCGCGCATCTCACGGGTTGCGCATCGCGCATCTAGCGCATCGGGCGCATCGCGCATCTCTGCGCATCAACCGAGCGCATCGGCGCATCGCGCATCAGCGCATCATGCGCATCGGCGCGCATCCGCGCATCTGCGCATCACGCGCATCGCGCATCTAGCGTCGCGCATCGCGCATCGCGCATCAAGCGCATCGCGCATCGCGCATCAAGGCGTACGCGCATCCGCGCATCGCGCATCGCGCATCGTGCGCATCATAGCGCATCGCGCATCCAGAATAGGCGCATCAGCGCATCGTGCGCATCGGCGCATCGCGCATCAGGCGCATCGGCGCATCCGATCGCGCATCTGCGATGCGCATCGGCGCATCACGTTTGCGCATCGCGCATCCGCGCATCACGGCGCATCCCGCGCATCGCGCATCCTAGCGCATCCGAGCGCATCGAGGACGCGCATCGAGGCGCATCTGCGCATCGCGCATCTCGCGCATCGCGCATCTTCAGCGCATCTGCGCATCGCGGCGCATCGATTTTGCGCATCCCGAATGCGCATCGCGCATCTGCGCATCGGGCGCATCTCGCGCATCGCGCATCGCGCATCGGCGCATCGCGCATCGGGAGGGCGCATCGCGCATCGCGCATCAAGCGCATCCGCGCATCTGGCGCATCCCTGCGCATCAGCGCATCTTGCGCATCGCGCATCGGCGCGCATCAGCGCATCCTGGCGCATCAGCGCATCGCGCATCGAGCGCATCTAGCGCATCCCTGGCGCATCGCGCATCGCGCATCGAGCGCATCCTCCCGCGCATCCATGCGCATCGCGCATCGGCGCATCCGCGCATCGATGTGGCGCATCCAGCGCATCGCGATAAGCGCATCGGGCGCATCGATCAGCGCATCCTTGCGCATCGCGCATCATAAAGCGCATCGGGTGCGCATCAGGCGCATCTTGTCGCGCATCGCGCATCTGCGCATCGAGGAGCGCATCAGGCGCATCATGCGCATCGCGCATCGCGCATCCAATCGCGCATCCAGGACGGCGCATCCGCGCATCATGGCGCATCCGCGCATCAATGGCGCATCAGCGCATCGGAGCCGCGCATCTCAACGTAGCGCATCGGGCGCATCTTACCGACAATAGCGCATCTGGGCGCATCAGCGCATCTGGCGCATCTGGTTCAGGTTCCCCCGCGCATCCGTGCGCATCATGCGCATCAAGCGCATCCTTGAGGCGCATCGCGCATCCGCGCATCGCGCATCCTCGCGCATCGCGCATCATGGCGCATCATGCGCATCATGCGTTGCGCATCATGCGCATCAAGGGCGCATCCGGGGCGCATCGCGCATCGCAATCTGGCGCATCCTGCGCATCGCCGCGCATCGCGCATCCTATAGCGCATCCCAAGCGCATCATCGTGTAGTGGCAGCGCATCGCGCATCAGCGCATCGCGCATCTCTTTGGCGCATCGCGCATCAGCGCATCGACACTTAGCGCATCGCGCATCCATAATGCGCATCCAGCGGAAGGATTGCGCATCGATCTGCGCATCGCGCATCGCGCATCAGCGCATCGCGCATCAACGAACAATCAACTTTAGTATAAGCGCATCAGCGCATCGCGGCGCATCAGAGCGCATCGCGCATCGCGCATCGGCGCATCGAGCGCATCGCGCATCAGGCGCATCGGAACCGCGCATCGCGCATCTGCGCATCGGGCGCATCGCGCGCATCTCCTACCCATGCGCATCCGCGCATCAGCGTACGCGCATCGGCCGGTTTTGGCGCATCAGGGCGCGCATCGGCGCATCATTTTGCGCGCATCCCCCGGATTGCGCGCATCGGCGCGCATCAGGATTTAGCGCATCGGCGCATCTAGCGCATCTGTGCGCATCGCGCATCGGCGCATCTTCGCGCATCTCGGGCGCATCGCCGCTCCGCGCATCTGCGCATCGCGCATCCAGCGCATCTCGCGCATCTTATGCAGCCAACGCGCATCTCATCGCGCATCTCGCGCATCCTCACGCGCATCGGTGAAGGCGCATCGCGCATCGAAAGCGCATCTTTGCGCATCTGCGCATCGTCTGCGCATCGCGCATCTGAGAGGGGCGCATCCAGTAACCGCGCATCTTGCGCATCAGAACGCGCATCGCGCATCTGCGCATCATCATTTTGCGCATCGACCGCGCATCAAGTGCGCATCGCGCATCGCGCATCCCGCGCATCCCGCGCATCTTCGCGCATCCGGCGCATCTGCGCATCACGCGCATCAGTGCGCATCGGCGCATCAGCGCATCGCGCATCTAGCGCATCGCGCATCATTCTTAAGCGCATCTAAGCGCATCACTCGGCGCGCATCGCGCATCTGCGTTGCAGCGCATCCTTAAGGCCAAGCTTTCGCGCATCAGGGCGCATCACGCGCATCGCGCATCCGCGCATCGGATGGCGCATCACATGGACACGGCGCATCTGGAGCATCTAAGCGCATCGCGCATCCCCAGCGCATCGTGTAGCGCATCTTGGTCACTTGCTCGGCGCATCGCGCATCCGCGCATCGGCGCATCAGGAGCGCATCTAGGCTATTCAGGTATCCAGCGCATCCGGCGCATCCGCGCATCCAAGGGCGCATCAAGCGCATCGAGCGCATCAGGCGCATCTAAATTAGGTAGTAGCGCGCATCGCGCATCAGCGCATCGCGCATCTAGGGACCCGTGGCGCATCCGCGCATCTGCGCATCAGGGCTTAATGAACTCGGATCGACGCGCGCATCGCGCATCTCAGCGCATCCGCGCATCACGGGCAGCGCATCGGCGCATCGCCGGCGCATCGATGCGCATCGCGCATCCGGCGCATCCGATGCGCATCCCGCGCATCAAGCGCATCGCGCATCGGGCGCATCGCGCATCGCGCATCGGCGCATCGTAGCGCATCTGCGCATCGGCGCATCGCGCATCACCGCGCATCCGCGCATCGCGAACGTCGCGCATCGTGCGCATCCGAGCGCATCCGGAGCGCATCGACTCGATGCGCATCGGGAGAACGCGCATCCCGTGTGACCTCGCGCATCGCGCATCGGATGCGCATCTAGGAGCGCATCTGCGCATCGGACTACGAGCGCATCGCGCATCCACAGCGAAGGGCGCATCCAGCGCATCATCTATGCGCATCGCGGCGCATCGCGCATCGCGCATCGCGCATCCCCGCGCATCAAGGGGGCGCATCCGCGCATCAGGCGCATCGCGCGCATCGCGCATCGATGTGACCGCGCATCCAGCGCATCAAGCGCATCATCGCTCTTGCATTAATCCAACAATGCGCATCCAGCGCATCATGTCGCTTGTATACGAGCGCATCTTCCATGCGCATCGGTGCGCATCTACGCGCATCATAACGCGCATCCGAAGCCTGCGCATCCACATGCGCATCGCGCATCCAGGCGCATCAGCGCATCTCTAGCTGCGCATCTGGCGCATCGCGCATCGCGCATCGTTCGCGCATCGTGCTGCGCATCGCGCATCGCGCATCGCGCATCCCGCGCATCCGCGCATCTATGCGCATCTGCGCATCAGCGCATCAGCGCATCAGCGCATCGCGCATCTGCGCATCGCGCATCATACGCGCATCAGGCGCATCGGCGCATCGGCGCATCTCGCGCATCGGCGCATCGCGCATCCTCTGGCTGCGCATCTCTGCGCATCGGGCGCATCGCGCATCTGCGCATCGCGCATCGCGCATCTGGGCGCATCACACTGTGGCCGCGCATCCGTGACGCGCATCGCGCATCCGTTGGGACGCGCATCCCGGCGCATCAGCGCATCGCGCATCTTCGCCTATTATGCGCATCGCGCATCGCGCATCGGTGCGCATCGGCCAATCCCGCGCATCCGCTTCCAGCGCATCGCGCATCGCGCATCGCGCATCTGCTGGAGCGCATCGACGTGCGCATCGCGCATCCCAGCGCATCTGCGCATCAGCGCATCTCGCGCATCCGCGCATCACATCGGAGGCCCCCCAGCGCATCTCGCGCGCATCCTGCGCATCCCGCGCATCCTTCGTGCGCATCCGCGCATCGCGCATCTGTGGCGCATCGTGCGCATCTCGCCATTATGCGCATCGCGCATCTAGGCGCATCGGCGCATCGCGCATCAGCGCATCGGGCGCATCGATAGAAATGGCGCATCAGAGTGCGCATCGCGCATCTGGCGCATCCGGCGCATCAGCGCATCTCACTCCGCGCATCATGCGCATCGCGCATCTTGAGGTAGTGCAGGCGCATCGGCGGCGCATCTGCGCATCGGCGCATCATGCGCATCACACTCGCGCATCTTGCGCATCTCGCGCATCGTGAGCGCATCCCGAGCGCATCGCGCATCTAGCGCATCGCGCATCTCGCGCATCGCGCATCGGCGCATCTCTGGCGCATCGCGCATCGTGACTTGCGCATCGCGCATCGCGCATCGCGCATCCAAGCGCATCGCGCATCCTTGGCGCATCAAATAGGGCGCATCAGCGCATCTCCCGGCGCATCAGCGCATCACTGGCGCATCGAATAAGCGCATCACGACGAAAAACGAGCGCATCGCGCATCATAGCGCATCCTGCGCATCACACCGCGCATCGGCGCATCCTTCTGCGCATCGACGCGCATCGGCGAGCGCATCATACGCGCATCGCGCATCGCGCATCGCGCATCGGGGCGCATCACCAGACGCGTCAGCGCATCCATGCGCATCACCGCGCATCGCGCATCAAGGCGCATCCGGCGCGCGCATCGCGCATCGGCGCATCAGCGCATCGCGCATCGCTCGCGCATCGGGCGCATCTGCGCGCATCGCGCATCGCGCATCGCGCATCGCGCATCTCCCGGTCAATGGGCGCATCGCGCATCCCCCATTAAAAGACCAGATGCGCATCAAGCGCATCGCGCATCGCGCATCGCGCATCTAGCGCATCGGCGCATCAACCAGGCGCATCGCGCATCGACCTGTCTGCGCATCGTAGCGCATCTCCGCGCATCTCCTGGCGCATCCTACGCGCATCTTTTAGCGCATCGCGCATCCGAAGGCGCATCTCGCGCATCAATGCGCATCGGGCGCATCGCGCATCTTAGCGCATCAGTGGCGCATCGCCGCGAAACGCGCATCCTTGCGCATCGCGCATCCGCGCATCGGTACACTTTGCGCATCGGGGGCGCATCTGCGCATCATCAGCGCATCGGAGCGCATCGGAGTGCGCATCTGCGCGCATCTCCGGCGCATCTGCGCATCTGAGGCGCATCGCGCATCACGCGCATCTGTCTCTGCGCATCCTGCGCATCTGCGCATCTTTCAGGAGCGCATCAAGTGGATGGCGCATCTAGCGCATCGGCGCATCTGCGCATCGCTCGCGCATCAACACAGCGCATCGCGCATCACGCGCATCTTTGGGCGCATCGGCGCATCAACGCGCGCATCGGGCGCATCGCGCATCCTACTGTAGTGCGCATCGCGCATCGCCTGCGCATCGGGCGCATCGCGCATCTTGTATGCGCATCGTTTGAGCCGCGCATCCCGCGGTGTGCGCATCGCGCGCATCAAACGCGCATCAGCGCATCTTCCGACGCGCATCCAAGCGCATCCTGCGGCGCATCGCTAAGCGCATCAAGGCGCATCACATATCGAGCGCATCGGGCGCATCAAAGCGCATCGCGCATCGCGCATCGGCCAGCGCATCAACCTCGCGCATCTAGCGCATCGCCGCGCATCACCCCGCGCATCGCGCATCATGCGCATCTGCGCATCGAATAGCGCATCACCAGTAGCGCATCCCGCGCATCTCGCGCATCGTGCGCATCGCATAGCGCATCGCGCATCGGGCGCATCATCTAACGGCGCATCGCGCATCAGGCGCATCATGTCGCGCATCGCGCATCTAGCGCGCATCATTGCGCATCAGGCGCATCGCGCATCCACACTCAGCGCATCACGCGCATCGACATATGCGCATCTTGCGCATCGCGCATCGCGCATCTAGCGCATCTAGCGCATCCGGCGCATCATAGAGGCGCATCCGGCGCATCCGGGTGGGATGGCGCATCTCTGGGCGCATCGTCTGCGCATCGCGCATCTACGCGCATCCGCGCATCTGCCCGCTGCTTGCGCATCCGCGCATCGGCGCATCGCGCATCACCGCGCATCGCGCATCTCAGCGCATCAGCGCATCGGGCGCATCGCGCATCCGTTAGGCGCATCATGCGCATCTTCTGCTGTTGGCGCATCGCGCATCACTTAGCGCATCAGCGCATCCGCGCATCGCGCATCCTGCGCATCGCGCATCGCAACGAGAGCAATAGACCCCTGCGCGCATCCGCGCATCCCCCGTTGCGCATCTTTGCGCATCCCAAAAGCTGCGCATCGCGCATCCACAACAAGCGCGCATCTGCGCATCAGCGCATCGAGCGCATCAGTCGGCGCGACAAGCGCATCGCGCATCATATAGCGCATCGAGGGCGCATCTGCGCATCGCGCATCCCCGCGCATCGCGCATCGCGCATCGCGCATCAGCCGAGCGCGCATCTGCGCATCGCGCATCCGCGCATCCGGCGCATCGCGCATCATACGCGCATCTGCGCATCCCCCCGAGCGGAGGGCGCATCTCCGCGCATCTGCGCATCTCTGGCGCATCACGACCCTCATGGCGCATCCAAGCGCATCGAGCGCATCCGCGCGCATCCAGCGCATCGCGCATCAGCGCATCGTACGCGCATCGGCGCATCATCACGCGCATCGCGCATCTCGCGCATCAGCGCATCCCTAAGCGCATCGGGCGCATCGGGGCGCATCTGCGCATCAGCGCATCGTACGCGCATCAGTGCGCATCCACGCGCATCGATGGGGCGCATCGGCGCATCTGCGCATCAACTCGCGCGCATCAGCGCATCGTGAATGCGCATCTAGTGCGCATCGGGATGCGCATCTGGCTGCGCATCGAACTAGTGCGCATCCGCGCATCGTGCGCATCGGGTTAGTGCGCATCAAACGGCGCATCATTTGCGCATCTGCGCATCTAATGCGCATCAGCAAGTCAGCGCATCGCGCATCGCGCATCTCACCCGGGCGCATCGTGCTGCGCATCGCGCATCGCGCATCCTTGCGCATCATAGGCAGCGCATCCGCGCATCACTGACGCGCATCCACCGTCGCGCATCGCGCATCGCGCATCGCGCATCGCGCATCTGCGCATCAGGCGCATCGAGGGGGCGCATCGCGCATCGGCGCATCTGGCGCATCTGCTACTCAGCGCATCCCGCGCATCAGCGCATCGCGCATCAGGCGCATCTGTCACTTGGGCGCATCTCCGCGCATCACAGGAAGCGCATCAGGCGCATCCGCGCATCTGCGCATCCCGGCGCATCGCGCATCACGCGCATCATTCGCGCATCGCGCATCTCCGGCGCATCGGCGCATCCGAGAAGCGCATCGGGACCGCGCATCCGCGCATCCGCGCATCGCGCATCCAGTCGCGCATCGCGCATCCGCGCGCATCGCGCATCAGCGCATCGGCGCATCGGCGCATCGCGCATCGCGCATCTCTCGCGCATCACGAGCGCATCGCGCATCGTGTTCGCGCATCTTGCGCATCGGCGCATCAATGCGCATCGCGCATCCTTTCAGCGCATCGCGCATCGCGCATCGCGCATCGGGCATCGATCGTGCGCATCGCGCATCGCGGCGCATCGCGCATCCGCGCATCATGCGCATCCTCGGGCGCATCGCGCATCGCAGGCAGCGCATCTGCTAGCATTGATGCGCATCGCGCATCCGCGCATCAAATAGCGCATCATACGCGCATCGCGCATCGCGCATCTGCGCATCAGCGCATCTGCGCATCGCGCATCCGCGCATCACAGCGCATCCATCCCCGCGCATCGCGCATCGCGCATCGCGCATCGCGGAGCGCATCGCGCATCGTGCGCATCGGCGCGCATCGCGCATCAGGCGCATCATTACGCGCATCGCGCATCGCGCATCTCTATGGCGCGCATCCTTAGGCGCATCGTCTAGCGCATCACTCCCTAGACTGCGCATCTAGCGCATCCGCGCATCTTACTTACTCGCGCATCGCGCATCAGCGCATCAAGTTTCGCGCATCTCCGCGCATCCGCGCATCGTGCGCATCAGCGCATCGGTAGGCGCATCACCAGCGCATCTGTGCGCATCGCGCATCGCGCATCCGTTGCGCATCTCACCTGTACAATTGCGCATCACGGCGCATCGCGCATCCTCGGCGCGCATCGGCGCATCGCGCGCATCCACACTGCGCATCGCAACGCGCATCGCGCATCGGCGCATCGGCGCATCGCGCATCTAGCGCATCTAGCAAATTGCGCGCATCGCGCATCTGCGCATCCATGCTAGCGCATCTGCGCATCGCGCATCTTTAGGCGCATCAACTCGCGCGCATCGCGCATCGCTTTACGCTCTGCGCATCCTGTGCGCATCAGCGCATCGGCGCATCGCGCATCGGGTAGATGCGCGCATCGCGCATCCATCGCGCATCAACGCGCATCCGCTGCGCATCCAGACAAGCGCATCGCGCATCGCCGCGCATCTCCGCGCATCGGCGCATCACCCAGCGCATCTCGCGCATCATGTCTGCGCATCTCTAGGCGCATCTGCGCATCAGCGCATCCCAGCGCATCGCGCATCGCGCATCCGCGCATCGCGCATCGGAAGCGCATCTAGCGCATCCGCGCATCCCAGCGCATCGCGCATCTCGCGCATCTGGCGCATCGGCGCGCATCCCATCCCGCGCATCAGTGTTGCGCATCGCGCATCAAGCGCATCGCGCATCGCGCATCTGAGCGCGCATC')
#indicesCombined = patternmatching('GCGCATCGC',s)
#indicesCombined = indices + indicesComp
#indicesCombined.sort()
#print indicesCombined
for i in range(0,len(indicesCombined)):
a = a+' ' + str(indicesCombined[i])
print(a)