Skip to content

Commit

Permalink
Fix UnicodeDecodeError in case of invalid UTF-8 in input file
Browse files Browse the repository at this point in the history
  • Loading branch information
flashcode committed Oct 23, 2024
1 parent 36759d5 commit 61b94ae
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

- Use file README.md as package long description

### Fixed

- Fix UnicodeDecodeError in case of invalid UTF-8 in input file

## Version 4.0.0 (2022-01-23)

### Changed
Expand Down
2 changes: 1 addition & 1 deletion msgcheck/po.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def read(self): # pylint: disable=too-many-locals
"""
self.msgs = []
checker = Checker()
with open(self.filename, "r", encoding="utf-8") as po_file:
with open(self.filename, "r", encoding="utf-8", errors="ignore") as po_file:
for line in po_file:
message = checker.check_line(line.strip())
if message:
Expand Down
40 changes: 40 additions & 0 deletions tests/fr_invalid_utf8.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Copyright (C) 2024 Sébastien Helleu <[email protected]>
#
# This file is part of msgcheck.
#
# Msgcheck is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Msgcheck is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with msgcheck. If not, see <https://www.gnu.org/licenses/>.
#

#
# Gettext file with invalid UTF-8 chars.
#

msgid ""
msgstr ""
"Project-Id-Version: msgcheck\n"
"Report-Msgid-Bugs-To: [email protected]\n"
"POT-Creation-Date: 2014-05-03 12:00+0200\n"
"PO-Revision-Date: 2024-09-12 17:02+0200\n"
"Last-Translator: Sébastien Helleu <[email protected]>\n"
"Language-Team: [email protected]\n"
"Language: fr\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=iso-8859-13\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"

# Normal string with special chars
msgid "id-õäöü"
msgstr "str-þð"
13 changes: 13 additions & 0 deletions tests/test_msgcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,16 @@ def test_punct_full_stop_ja_zh(language, msgid, msgstr, error_message):
assert error_message in errors[0].message
else:
assert not errors


def test_invalid_utf8():
"""Test checks on a file with invalid UTF-8 chars."""
po_check = PoCheck()
po_check.set_check("fuzzy", True)
result = po_check.check_files([local_path("fr_invalid_utf8.po")])

# be sure we have one file in result
assert len(result) == 1

# the file has no errors
assert len(result[0][1]) == 0

0 comments on commit 61b94ae

Please sign in to comment.