From 4030324d2dbe0842e3c5d1394cce93e59be9a496 Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Thu, 13 May 2021 11:50:49 +0200 Subject: [PATCH] textimport: Mark encoding errors in the preview --- Orange/widgets/utils/textimport.py | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/Orange/widgets/utils/textimport.py b/Orange/widgets/utils/textimport.py index ab8d6fb884a..5fc6aa4c8ac 100644 --- a/Orange/widgets/utils/textimport.py +++ b/Orange/widgets/utils/textimport.py @@ -991,9 +991,11 @@ def __resetPreview(self): base = CachedBytesIOWrapper(self.__sample, self.__buffer) wrapper = io.TextIOWrapper( - base, encoding=self.encoding(), errors="replace" + base, encoding=self.encoding(), + # use surrogate escape to validate/detect encoding errors in + # delegates + errors="surrogateescape" ) - rows = csv.reader( wrapper, dialect=self.dialect() ) @@ -1372,6 +1374,11 @@ def sizeHint(self): return sh.expandedTo(QSize(8 * hsection, 20 * vsection)) +def is_surrogate_escaped(text: str) -> bool: + """Does `text` contain any surrogate escape characters.""" + return any("\udc80" <= c <= "\udcff" for c in text) + + class PreviewItemDelegate(QStyledItemDelegate): def initStyleOption(self, option, index): # type: (QStyleOptionViewItem, QModelIndex) -> None @@ -1389,6 +1396,18 @@ def initStyleOption(self, option, index): if coltype == ColumnType.Numeric or coltype == ColumnType.Time: option.displayAlignment = Qt.AlignRight | Qt.AlignVCenter + if not self.validate(option.text): + option.palette.setBrush( + QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern) + ) + option.palette.setBrush( + QPalette.All, QPalette.HighlightedText, + QBrush(Qt.red, Qt.SolidPattern) + ) + + def validate(self, value: str) -> bool: + return not is_surrogate_escaped(value) + def helpEvent(self, event, view, option, index): # type: (QHelpEvent, QAbstractItemView, QStyleOptionViewItem, QModelIndex) -> bool if event.type() == QEvent.ToolTip: @@ -1469,14 +1488,6 @@ def __init__(self, *args, converter=None, **kwargs): def initStyleOption(self, option, index): super().initStyleOption(option, index) - if not self.validate(option.text): - option.palette.setBrush( - QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern) - ) - option.palette.setBrush( - QPalette.All, QPalette.HighlightedText, - QBrush(Qt.red, Qt.SolidPattern) - ) def validate(self, value): if value in {"NA", "Na", "na", "n/a", "N/A", "?", "", "."}: @@ -1486,7 +1497,7 @@ def validate(self, value): except ValueError: return False else: - return True + return super().validate(value) def number_parser(groupsep, decimalsep):