Skip to content
This repository has been archived by the owner on Apr 18, 2022. It is now read-only.

mandarin_frontend.py文件 _adjust方法这一行是否有bug #32

Open
ZerRui opened this issue Mar 29, 2019 · 0 comments
Open

mandarin_frontend.py文件 _adjust方法这一行是否有bug #32

ZerRui opened this issue Mar 29, 2019 · 0 comments

Comments

@ZerRui
Copy link

ZerRui commented Mar 29, 2019

def _adjust(prosody_txt):
'''Make sure that segment word is smaller than prosody word'''
prosody_words = re.split('#\d', prosody_txt)
rhythms = re.findall('#\d', prosody_txt)
txt = ''.join(prosody_words)
words = []
poses = []
# for txt in prosody_words:
for word, pos in posseg.cut(txt):
words.append(word)
poses.append(pos[0])
index = 0
insert_time = 0
length = len(prosody_words[index])
i = 0
while i < len(words):
done = False
while not done:
if (len(words[i]) > length):
#print(words[i], prosody_words[index])
length += len(prosody_words[index + 1])
rhythms[index+ insert_time] = '' #这一行之前是rhythms[index] = ''
index += 1
elif (len(words[i]) < length):
# print(' less than ', words[i], prosody_words[index])
rhythms.insert(index + insert_time, '#0')
# rhythms.insert(index, '#0')
insert_time += 1
length -= len(words[i])
i += 1
else:
# print('equal :', words[i])
# print(rhythms)
done = True
index += 1
else:
if (index < len(prosody_words)):
length = len(prosody_words[index])
i += 1
if rhythms[-1] != '#4':
rhythms.append('#4')
rhythms = [x for x in rhythms if x != '']
# print(rhythms)
return (words, poses, rhythms)

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant