Skip to content

Commit

Permalink
Merge pull request #4876 from PrimozGodec/csv-import-backward
Browse files Browse the repository at this point in the history
CSV file Import: Backward compatibility
  • Loading branch information
janezd authored Jul 31, 2020
2 parents 21b6b45 + fe064b8 commit b6a5c2d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 9 deletions.
27 changes: 21 additions & 6 deletions Orange/widgets/data/owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import typing
from typing import (
List, Tuple, Dict, Optional, Any, Callable, Iterable, Hashable,
Union, AnyStr, BinaryIO
Union, AnyStr, BinaryIO, Set
)

from PyQt5.QtCore import (
Expand Down Expand Up @@ -486,6 +486,13 @@ class Error(widget.OWWidget.Error):
"directory": "",
"filter": ""
}) # type: Dict[str, str]

# we added column type guessing to this widget, which breaks compatibility
# with older saved workflows, where types not guessed differently, when
# compatibility_mode=True widget have older guessing behaviour
settings_version = 2
compatibility_mode = settings.Setting(False, schema_only=True)

MaxHistorySize = 50

want_main_area = False
Expand Down Expand Up @@ -844,7 +851,7 @@ def progress_(i, j):

task.future = self.__executor.submit(
clear_stack_on_cancel(load_csv),
path, opts, progress_,
path, opts, progress_, self.compatibility_mode
)
task.watcher.setFuture(task.future)
w = task.watcher
Expand Down Expand Up @@ -1043,6 +1050,11 @@ def _restoreState(self):
if idx != -1:
self.recent_combo.setCurrentIndex(idx)

@classmethod
def migrate_settings(cls, settings, version):
if not version or version < 2:
settings["compatibility_mode"] = True


@singledispatch
def sniff_csv(file, samplesize=2 ** 20):
Expand Down Expand Up @@ -1165,8 +1177,8 @@ def _mime_type_for_path(path):
}


def load_csv(path, opts, progress_callback=None):
# type: (Union[AnyStr, BinaryIO], Options, ...) -> pd.DataFrame
def load_csv(path, opts, progress_callback=None, compatibility_mode=False):
# type: (Union[AnyStr, BinaryIO], Options, ..., bool) -> pd.DataFrame
def dtype(coltype):
# type: (ColumnType) -> Optional[str]
if coltype == ColumnType.Numeric:
Expand Down Expand Up @@ -1268,7 +1280,10 @@ def expand(ranges):
na_values=na_values, keep_default_na=False,
**numbers_format_kwds
)
df = guess_types(df, dtypes, columns_ignored)

# for older workflows avoid guessing type guessing
if not compatibility_mode:
df = guess_types(df, dtypes, columns_ignored)

if columns_ignored:
# TODO: use 'usecols' parameter in `read_csv` call to
Expand All @@ -1282,7 +1297,7 @@ def expand(ranges):


def guess_types(
df: pd.DataFrame, dtypes: Dict[int, str], columns_ignored: List[int]
df: pd.DataFrame, dtypes: Dict[int, str], columns_ignored: Set[int]
) -> pd.DataFrame:
"""
Guess data type for variables according to values.
Expand Down
36 changes: 33 additions & 3 deletions Orange/widgets/data/tests/test_owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def test_type_guessing(self):
stored_settings={
"_session_items": [
(path, self.data_csv_types_options.as_dict())
]
],
"__version__": 2 # guessing works for versions >= 2
}
)
widget.commit()
Expand All @@ -161,9 +162,37 @@ def test_type_guessing(self):
self.assertIsInstance(domain["numeric2"], ContinuousVariable)
self.assertIsInstance(domain["string"], StringVariable)

def test_backward_compatibility(self):
"""
Check that widget have old behaviour on workflows with version < 2
"""
dirname = os.path.dirname(__file__)
path = os.path.join(dirname, "data-csv-types.tab")
widget = self.create_widget(
owcsvimport.OWCSVFileImport,
stored_settings={
"_session_items": [
(path, self.data_csv_types_options.as_dict())
],
"__version__": 1 # guessing works for versions >= 2
}
)
widget.commit()
self.wait_until_finished(widget)
output = self.get_output("Data", widget)
domain = output.domain

self.assertIsInstance(domain["time"], StringVariable)
self.assertIsInstance(domain["discrete1"], ContinuousVariable)
self.assertIsInstance(domain["discrete2"], StringVariable)
self.assertIsInstance(domain["numeric1"], ContinuousVariable)
self.assertIsInstance(domain["numeric2"], ContinuousVariable)
self.assertIsInstance(domain["string"], StringVariable)


class TestImportDialog(GuiTest):
def test_dialog(self):
@staticmethod
def test_dialog():
dirname = os.path.dirname(__file__)
path = os.path.join(dirname, "grep_file.txt")
d = owcsvimport.CSVImportDialog()
Expand Down Expand Up @@ -242,7 +271,8 @@ def test_load_csv(self):
list(df.iloc[:, 1]), ["one", "three"]
)

def test_convert(self):
@staticmethod
def test_convert():
contents = (
b'I, J, K\n'
b' , A, \n'
Expand Down

0 comments on commit b6a5c2d

Please sign in to comment.