Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] File: Raise and handle Exc. when file bad pickle #2232

Merged
merged 2 commits into from
Apr 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 38 additions & 24 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def detect_encoding(filename):
if encoding in (b'utf-8', b'us-ascii', b'iso-8859-1',
b'utf-7', b'utf-16le', b'utf-16be', b'ebcdic'):
return encoding.decode('us-ascii')
except OSError: pass # windoze
except OSError:
pass # windoze

# file not available or unable to guess the encoding, have chardet do it
detector = UniversalDetector()
Expand Down Expand Up @@ -163,7 +164,7 @@ def valuemap_index(val):
offset = len(new_order)
column = values if data.ndim > 1 else data
column += offset
for i, val in enumerate(var.values):
for _, val in enumerate(var.values):
try:
oldval = old_order.index(val)
except ValueError:
Expand All @@ -185,11 +186,11 @@ class Flags:
_RE_SPLIT = re.compile(r'(?<!\\)' + DELIMITER).split
_RE_ATTR_UNQUOTED_STR = re.compile(r'^[a-zA-Z_]').match
ALL = OrderedDict((
('class', 'c'),
('ignore', 'i'),
('meta', 'm'),
('weight', 'w'),
('.+?=.*?', ''), # general key=value attributes
('class', 'c'),
('ignore', 'i'),
('meta', 'm'),
('weight', 'w'),
('.+?=.*?', ''), # general key=value attributes
))
_RE_ALL = re.compile(r'^({})$'.format('|'.join(filter(None, flatten(ALL.items())))))

Expand Down Expand Up @@ -228,10 +229,13 @@ def split(s):

# Matches discrete specification where all the values are listed, space-separated
_RE_DISCRETE_LIST = re.compile(r'^\s*[^\s]+(\s[^\s]+)+\s*$')
_RE_TYPES = re.compile(r'^\s*({}|{}|)\s*$'.format(_RE_DISCRETE_LIST.pattern,
'|'.join(flatten(getattr(vartype, 'TYPE_HEADERS')
for vartype in Variable.registry.values()))))
_RE_FLAGS = re.compile(r'^\s*( |{}|)*\s*$'.format('|'.join(flatten(filter(None, i) for i in Flags.ALL.items()))))
_RE_TYPES = re.compile(r'^\s*({}|{}|)\s*$'.format(
_RE_DISCRETE_LIST.pattern,
'|'.join(flatten(getattr(vartype, 'TYPE_HEADERS') for vartype in Variable.registry.values()))
))
_RE_FLAGS = re.compile(r'^\s*( |{}|)*\s*$'.format(
'|'.join(flatten(filter(None, i) for i in Flags.ALL.items()))
))


class FileFormatMeta(Registry):
Expand Down Expand Up @@ -454,8 +458,10 @@ def parse_headers(data):
"""Return (header rows, rest of data) as discerned from `data`"""

def is_number(item):
try: float(item)
except ValueError: return False
try:
float(item)
except ValueError:
return False
return True
# Second row items are type identifiers
def header_test2(items):
Expand Down Expand Up @@ -485,8 +491,10 @@ def header_test3(items):

# Try to parse a single-line header
if not header_rows:
try: lines.append(list(next(data)))
except StopIteration: pass
try:
lines.append(list(next(data)))
except StopIteration:
pass
if lines:
# Header if none of the values in line 1 parses as a number
if not all(is_number(i) for i in lines[0]):
Expand All @@ -497,7 +505,7 @@ def header_test3(items):
return header_rows, data

@classmethod
def data_table(self, data, headers=None):
def data_table(cls, data, headers=None):
"""
Return Orange.data.Table given rows of `headers` (iterable of iterable)
and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might
Expand All @@ -510,22 +518,24 @@ def data_table(self, data, headers=None):
assuming they precede it.
"""
if not headers:
headers, data = self.parse_headers(data)
headers, data = cls.parse_headers(data)

# Consider various header types (single-row, two-row, three-row, none)
if 3 == len(headers):
if len(headers) == 3:
names, types, flags = map(list, headers)
else:
if 1 == len(headers):
if len(headers) == 1:
HEADER1_FLAG_SEP = '#'
# First row format either:
# 1) delimited column names
# 2) -||- with type and flags prepended, separated by #,
# e.g. d#sex,c#age,cC#IQ
_flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i)
for i in headers[0]])
_flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1)
if HEADER1_FLAG_SEP in i else ('', i)
for i in headers[0]]
)
names = list(names)
elif 2 == len(headers):
elif len(headers) == 2:
names, _flags = map(list, headers)
else:
# Use heuristics for everything
Expand Down Expand Up @@ -635,7 +645,7 @@ def _equal_length(lst):
cols, domain_vars = append_to
cols.append(col)

existing_var, new_var_name, column = None, None, None
existing_var, new_var_name = None, None
if domain_vars is not None:
existing_var = names and names[col]
if not existing_var:
Expand Down Expand Up @@ -807,7 +817,11 @@ class PickleReader(FileFormat):

def read(self):
with open(self.filename, 'rb') as f:
return pickle.load(f)
table = pickle.load(f)
if not isinstance(table, Table):
raise TypeError("file does not contain a data table")
else:
return table

@staticmethod
def write_file(filename, data):
Expand Down
13 changes: 13 additions & 0 deletions Orange/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,16 @@ def test_locate_wildcard_extension(self):
l = FileFormat.locate("t", search_dirs=[tempdir])
self.assertEqual(l, fn)
shutil.rmtree(tempdir)


class TestReader(unittest.TestCase):

def test_open_bad_pickle(self):
"""
Raise TypeError when PickleReader reads a pickle
file without a table (and it suppose to be there).
GH-2232
"""
reader = PickleReader("")
with unittest.mock.patch("pickle.load", return_value=None):
self.assertRaises(TypeError, reader.read, "foo")