Skip to content

Commit

Permalink
Use u'' strings again for easier cross Python 2/3 code.
Browse files Browse the repository at this point in the history
  • Loading branch information
hannosch committed Jan 4, 2016
1 parent 1b2213c commit b62ff45
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 48 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Changelog
2.2 - unreleased
----------------

- Use `u''` strings again for easier cross Python 2/3 code.
[hannosch]

- #5: Fix plural form support under Python 3.x.
[hannosch]

Expand Down
35 changes: 12 additions & 23 deletions pythongettext/msgfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,6 @@

PY3 = sys.version_info[0] == 3
if PY3:
def b(s):
return s.encode("latin-1")

def u(s, enc=None):
return s

def header_charset(s):
p = HeaderParser()
return p.parsestr(s).get_content_charset()
Expand All @@ -54,12 +48,6 @@ def header_charset(s):
BytesIO = io.BytesIO
FILE_TYPE = io.IOBase
else:
def b(s):
return s

def u(s, enc="unicode_escape"):
return unicode(s, enc)

def header_charset(s):
p = HeaderParser()
return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
Expand Down Expand Up @@ -117,7 +105,7 @@ def add(self, context, id, string, fuzzy):
if string and not fuzzy:
# The context is put before the id and separated by a EOT char.
if context:
id = context + u('\x04') + id
id = context + u'\x04' + id
if not id:
# See whether there is an encoding declaration
charset = header_charset(string)
Expand All @@ -135,17 +123,17 @@ def generate(self):
# the keys are sorted in the .mo file
keys = sorted(self.messages.keys())
offsets = []
ids = strs = b('')
ids = strs = b''
for id in keys:
msg = self.messages[id].encode(self.encoding)
id = id.encode(self.encoding)
# For each string, we need size and file offset. Each string is
# NUL terminated; the NUL does not count into the size.
offsets.append((len(ids), len(id), len(strs),
len(msg)))
ids += id + b('\0')
strs += msg + b('\0')
output = b('')
ids += id + b'\0'
strs += msg + b'\0'
output = b''
# The header is 7 32-bit unsigned integers. We don't use hash tables,
# so the keys start right after the index tables.
keystart = 7 * 4 + 16 * len(keys)
Expand Down Expand Up @@ -191,7 +179,7 @@ def read(self, header_only=False):

section = None
fuzzy = 0
msgid = msgstr = msgctxt = u('')
msgid = msgstr = msgctxt = u''

# Parse the catalog
lno = 0
Expand All @@ -218,15 +206,15 @@ def read(self, header_only=False):
if l.startswith('msgctxt'):
section = CTXT
l = l[7:]
msgctxt = u('')
msgctxt = u''
# Now we are in a msgid section, output previous section
elif (l.startswith('msgid') and
not l.startswith('msgid_plural')):
if section == STR:
self.add(msgid, msgstr, fuzzy)
section = ID
l = l[5:]
msgid = msgstr = u('')
msgid = msgstr = u''
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
Expand All @@ -235,7 +223,7 @@ def read(self, header_only=False):
'msgid on line %d of po file %s' %
(lno, repr(self.name)))
l = l[12:]
msgid += u('\0') # separator of singular and plural
msgid += u'\0' # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
Expand All @@ -248,7 +236,7 @@ def read(self, header_only=False):
l = l.split(']', 1)[1]
if msgstr:
# Separator of the various plural forms
msgstr += u('\0')
msgstr += u'\0'
else:
if is_plural:
raise PoSyntaxError('indexed msgstr required for '
Expand All @@ -265,7 +253,8 @@ def read(self, header_only=False):
except Exception as msg:
raise PoSyntaxError('%s (line %d of po file %s): \n%s' %
(msg, lno, repr(self.name), l))
l = u(l, self.encoding)
if isinstance(l, bytes):
l = l.decode(self.encoding)
if section == CTXT:
msgctxt += l
elif section == ID:
Expand Down
35 changes: 10 additions & 25 deletions pythongettext/tests/test_compile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import os
import sys

from pythongettext.msgfmt import Msgfmt
from pythongettext.msgfmt import PoSyntaxError
Expand All @@ -11,20 +10,6 @@

FOLDER = os.path.dirname(__file__)

PY3 = sys.version_info[0] == 3
if PY3:
def b(s):
return s.encode("latin-1")

def u(s, enc=None):
return s
else:
def b(s):
return s

def u(s, enc="unicode_escape"):
return unicode(s, enc)


class TestWriter(unittest.TestCase):

Expand All @@ -35,7 +20,7 @@ def compare_po_mo(self, poname, moname):
po_file = open(os.path.join(FOLDER, poname), 'rb')
po = Msgfmt(po_file).get()
mo_file = open(os.path.join(FOLDER, moname), 'rb')
mo = b('').join(mo_file.readlines())
mo = b''.join(mo_file.readlines())
finally:
if po_file is not None:
po_file.close()
Expand All @@ -62,8 +47,8 @@ def test_test4(self):
po.read(header_only=True)
po_file.close()
self.assertTrue(
po.messages[u('')].startswith('Project-Id-Version: foo'))
self.assertEqual(po.encoding, u('iso-8859-1'))
po.messages[u''].startswith('Project-Id-Version: foo'))
self.assertEqual(po.encoding, u'iso-8859-1')

def test_test5(self):
po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
Expand All @@ -73,17 +58,17 @@ def test_test5(self):
po.read()
finally:
po_file.close()
self.assertEqual(po.encoding, u('utf-8'))
self.assertEqual(po.encoding, u'utf-8')

def test_test5_unicode_name(self):
po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
po = Msgfmt(po_file, name=u('dømain', 'utf-8'))
po = Msgfmt(po_file, name=u'dømain')
try:
with self.assertRaises(PoSyntaxError):
po.read()
finally:
po_file.close()
self.assertEqual(po.encoding, u('utf-8'))
self.assertEqual(po.encoding, u'utf-8')

def test_test6(self):
self.compare_po_mo('test6.po', 'test6.mo')
Expand All @@ -93,9 +78,9 @@ def test_test6_unicode_header(self):
po = Msgfmt(po_file)
po.read(header_only=True)
po_file.close()
self.assertTrue(po.messages[u('')].startswith(
u('Project-Id-Version: Tøst 1.0', 'utf-8')))
self.assertEqual(po.encoding, u('utf-8'))
self.assertTrue(po.messages[u''].startswith(
u'Project-Id-Version: Tøst 1.0'))
self.assertEqual(po.encoding, u'utf-8')

def test_escape(self):
po_file = open(os.path.join(FOLDER, 'test_escape.po'), 'rb')
Expand All @@ -104,7 +89,7 @@ def test_escape(self):
with self.assertRaises(PoSyntaxError) as e:
po.read()
self.assertTrue('line 19' in e.exception.msg)
self.assertEqual(po.encoding, u('utf-8'))
self.assertEqual(po.encoding, u'utf-8')
finally:
po_file.close()

Expand Down

0 comments on commit b62ff45

Please sign in to comment.