-
Notifications
You must be signed in to change notification settings - Fork 1
/
trHyph.py
58 lines (48 loc) · 2.08 KB
/
trHyph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ****************************************************************************
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# ****************************************************************************
"""
trHyph.py is a python script for generating a TeX-compliant Turkish hyphenation pattern dictionary.
The script and the generated .dic file are both published under GNU General Public License.
Yakup Cetinkaya, December 2010
"""
import sys
coding = "iso8859_9"
consonants = [u"b",u"c",u"ç",u"d",u"f",u"g",u"ğ",u"h",u"j",u"k",u"l",u"m",u"n",u"p",u"r",u"s",u"ş",u"t",u"v",u"y",u"z"]
vowels = [u"a",u"e",u"ı",u"i",u"o",u"ö",u"u",u"ü"]
tokens = [coding+"\n"]
for i in range(0,len(vowels)):
for j in range(0,len(vowels)):
level = int(i==j)*2+1
tokens.append(unicode(vowels[i]+str(level)+vowels[j]))
for i in range(0,len(consonants)):
for j in range(0,len(vowels)):
tokens.append(unicode(consonants[i]+"2"+vowels[j]+"1"))
for i in range(0,len(vowels)):
for j in range(0,len(consonants)):
for k in range(0,len(consonants)):
tokens.append(unicode(vowels[i]+"2"+consonants[j]+"1"+consonants[k]))
for i in range(0,len(consonants)):
for j in range(0,len(consonants)):
for k in range(0,len(consonants)):
tokens.append(unicode(consonants[i]+"2"+consonants[j]+"3"+consonants[k]))
f = open("hyph_tr.dic","w")
for t in tokens:
f.write(t.encode(coding)+"\n")
f.close()