Use u'' strings again for easier cross Python 2/3 code.

hannosch · Jan 4, 2016 · b62ff45 · b62ff45
1 parent 1b2213c
commit b62ff45
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 48 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -4,6 +4,9 @@ Changelog
 2.2 - unreleased
 ----------------
 
+- Use `u''` strings again for easier cross Python 2/3 code.
+  [hannosch]
+
 - #5: Fix plural form support under Python 3.x.
   [hannosch]
 

diff --git a/pythongettext/msgfmt.py b/pythongettext/msgfmt.py
@@ -40,12 +40,6 @@
 
 PY3 = sys.version_info[0] == 3
 if PY3:
-    def b(s):
-        return s.encode("latin-1")
-
-    def u(s, enc=None):
-        return s
-
     def header_charset(s):
         p = HeaderParser()
         return p.parsestr(s).get_content_charset()
@@ -54,12 +48,6 @@ def header_charset(s):
     BytesIO = io.BytesIO
     FILE_TYPE = io.IOBase
 else:
-    def b(s):
-        return s
-
-    def u(s, enc="unicode_escape"):
-        return unicode(s, enc)
-
     def header_charset(s):
         p = HeaderParser()
         return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
@@ -117,7 +105,7 @@ def add(self, context, id, string, fuzzy):
         if string and not fuzzy:
             # The context is put before the id and separated by a EOT char.
             if context:
-                id = context + u('\x04') + id
+                id = context + u'\x04' + id
             if not id:
                 # See whether there is an encoding declaration
                 charset = header_charset(string)
@@ -135,17 +123,17 @@ def generate(self):
         # the keys are sorted in the .mo file
         keys = sorted(self.messages.keys())
         offsets = []
-        ids = strs = b('')
+        ids = strs = b''
         for id in keys:
             msg = self.messages[id].encode(self.encoding)
             id = id.encode(self.encoding)
             # For each string, we need size and file offset. Each string is
             # NUL terminated; the NUL does not count into the size.
             offsets.append((len(ids), len(id), len(strs),
                             len(msg)))
-            ids += id + b('\0')
-            strs += msg + b('\0')
-        output = b('')
+            ids += id + b'\0'
+            strs += msg + b'\0'
+        output = b''
         # The header is 7 32-bit unsigned integers. We don't use hash tables,
         # so the keys start right after the index tables.
         keystart = 7 * 4 + 16 * len(keys)
@@ -191,7 +179,7 @@ def read(self, header_only=False):
 
         section = None
         fuzzy = 0
-        msgid = msgstr = msgctxt = u('')
+        msgid = msgstr = msgctxt = u''
 
         # Parse the catalog
         lno = 0
@@ -218,15 +206,15 @@ def read(self, header_only=False):
             if l.startswith('msgctxt'):
                 section = CTXT
                 l = l[7:]
-                msgctxt = u('')
+                msgctxt = u''
             # Now we are in a msgid section, output previous section
             elif (l.startswith('msgid') and
                   not l.startswith('msgid_plural')):
                 if section == STR:
                     self.add(msgid, msgstr, fuzzy)
                 section = ID
                 l = l[5:]
-                msgid = msgstr = u('')
+                msgid = msgstr = u''
                 is_plural = False
             # This is a message with plural forms
             elif l.startswith('msgid_plural'):
@@ -235,7 +223,7 @@ def read(self, header_only=False):
                         'msgid on line %d of po file %s' %
                         (lno, repr(self.name)))
                 l = l[12:]
-                msgid += u('\0')  # separator of singular and plural
+                msgid += u'\0'  # separator of singular and plural
                 is_plural = True
             # Now we are in a msgstr section
             elif l.startswith('msgstr'):
@@ -248,7 +236,7 @@ def read(self, header_only=False):
                     l = l.split(']', 1)[1]
                     if msgstr:
                         # Separator of the various plural forms
-                        msgstr += u('\0')
+                        msgstr += u'\0'
                 else:
                     if is_plural:
                         raise PoSyntaxError('indexed msgstr required for '
@@ -265,7 +253,8 @@ def read(self, header_only=False):
             except Exception as msg:
                 raise PoSyntaxError('%s (line %d of po file %s): \n%s' %
                     (msg, lno, repr(self.name), l))
-            l = u(l, self.encoding)
+            if isinstance(l, bytes):
+                l = l.decode(self.encoding)
             if section == CTXT:
                 msgctxt += l
             elif section == ID:

diff --git a/pythongettext/tests/test_compile.py b/pythongettext/tests/test_compile.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import os
-import sys
 
 from pythongettext.msgfmt import Msgfmt
 from pythongettext.msgfmt import PoSyntaxError
@@ -11,20 +10,6 @@
 
 FOLDER = os.path.dirname(__file__)
 
-PY3 = sys.version_info[0] == 3
-if PY3:
-    def b(s):
-        return s.encode("latin-1")
-
-    def u(s, enc=None):
-        return s
-else:
-    def b(s):
-        return s
-
-    def u(s, enc="unicode_escape"):
-        return unicode(s, enc)
-
 
 class TestWriter(unittest.TestCase):
 
@@ -35,7 +20,7 @@ def compare_po_mo(self, poname, moname):
             po_file = open(os.path.join(FOLDER, poname), 'rb')
             po = Msgfmt(po_file).get()
             mo_file = open(os.path.join(FOLDER, moname), 'rb')
-            mo = b('').join(mo_file.readlines())
+            mo = b''.join(mo_file.readlines())
         finally:
             if po_file is not None:
                 po_file.close()
@@ -62,8 +47,8 @@ def test_test4(self):
         po.read(header_only=True)
         po_file.close()
         self.assertTrue(
-            po.messages[u('')].startswith('Project-Id-Version: foo'))
-        self.assertEqual(po.encoding, u('iso-8859-1'))
+            po.messages[u''].startswith('Project-Id-Version: foo'))
+        self.assertEqual(po.encoding, u'iso-8859-1')
 
     def test_test5(self):
         po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
@@ -73,17 +58,17 @@ def test_test5(self):
                 po.read()
         finally:
             po_file.close()
-        self.assertEqual(po.encoding, u('utf-8'))
+        self.assertEqual(po.encoding, u'utf-8')
 
     def test_test5_unicode_name(self):
         po_file = open(os.path.join(FOLDER, 'test5.po'), 'rb')
-        po = Msgfmt(po_file, name=u('dømain', 'utf-8'))
+        po = Msgfmt(po_file, name=u'dømain')
         try:
             with self.assertRaises(PoSyntaxError):
                 po.read()
         finally:
             po_file.close()
-        self.assertEqual(po.encoding, u('utf-8'))
+        self.assertEqual(po.encoding, u'utf-8')
 
     def test_test6(self):
         self.compare_po_mo('test6.po', 'test6.mo')
@@ -93,9 +78,9 @@ def test_test6_unicode_header(self):
         po = Msgfmt(po_file)
         po.read(header_only=True)
         po_file.close()
-        self.assertTrue(po.messages[u('')].startswith(
-            u('Project-Id-Version: Tøst 1.0', 'utf-8')))
-        self.assertEqual(po.encoding, u('utf-8'))
+        self.assertTrue(po.messages[u''].startswith(
+            u'Project-Id-Version: Tøst 1.0'))
+        self.assertEqual(po.encoding, u'utf-8')
 
     def test_escape(self):
         po_file = open(os.path.join(FOLDER, 'test_escape.po'), 'rb')
@@ -104,7 +89,7 @@ def test_escape(self):
             with self.assertRaises(PoSyntaxError) as e:
                 po.read()
             self.assertTrue('line 19' in e.exception.msg)
-            self.assertEqual(po.encoding, u('utf-8'))
+            self.assertEqual(po.encoding, u'utf-8')
         finally:
             po_file.close()