biolab · janezd · Jul 31, 2020 · Jun 26, 2020
diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py
@@ -27,7 +27,7 @@
 import typing
 from typing import (
     List, Tuple, Dict, Optional, Any, Callable, Iterable, Hashable,
-    Union, AnyStr, BinaryIO
+    Union, AnyStr, BinaryIO, Set
 )
 
 from PyQt5.QtCore import (
@@ -486,6 +486,13 @@ class Error(widget.OWWidget.Error):
         "directory": "",
         "filter": ""
     })  # type: Dict[str, str]
+
+    # we added column type guessing to this widget, which breaks compatibility
+    # with older saved workflows, where types not guessed differently, when
+    # compatibility_mode=True widget have older guessing behaviour
+    settings_version = 2
+    compatibility_mode = settings.Setting(False, schema_only=True)
+
     MaxHistorySize = 50
 
     want_main_area = False
@@ -844,7 +851,7 @@ def progress_(i, j):
 
         task.future = self.__executor.submit(
             clear_stack_on_cancel(load_csv),
-            path, opts, progress_,
+            path, opts, progress_, self.compatibility_mode
         )
         task.watcher.setFuture(task.future)
         w = task.watcher
@@ -1043,6 +1050,11 @@ def _restoreState(self):
             if idx != -1:
                 self.recent_combo.setCurrentIndex(idx)
 
+    @classmethod
+    def migrate_settings(cls, settings, version):
+        if not version or version < 2:
+            settings["compatibility_mode"] = True
+
 
 @singledispatch
 def sniff_csv(file, samplesize=2 ** 20):
@@ -1160,8 +1172,8 @@ def _mime_type_for_path(path):
 }
 
 
-def load_csv(path, opts, progress_callback=None):
-    # type: (Union[AnyStr, BinaryIO], Options, ...) -> pd.DataFrame
+def load_csv(path, opts, progress_callback=None, compatibility_mode=False):
+    # type: (Union[AnyStr, BinaryIO], Options, ..., bool) -> pd.DataFrame
     def dtype(coltype):
         # type: (ColumnType) -> Optional[str]
         if coltype == ColumnType.Numeric:
@@ -1256,7 +1268,10 @@ def expand(ranges):
             float_precision="round_trip",
             **numbers_format_kwds
         )
-        df = guess_types(df, dtypes, columns_ignored)
+
+        # for older workflows avoid guessing type guessing
+        if not compatibility_mode:
+            df = guess_types(df, dtypes, columns_ignored)
 
         if columns_ignored:
             # TODO: use 'usecols' parameter in `read_csv` call to
@@ -1270,7 +1285,7 @@ def expand(ranges):
 
 
 def guess_types(
-        df: pd.DataFrame, dtypes: Dict[int, str], columns_ignored: List[int]
+        df: pd.DataFrame, dtypes: Dict[int, str], columns_ignored: Set[int]
 ) -> pd.DataFrame:
     """
     Guess data type for variables according to values.

diff --git a/Orange/widgets/data/tests/test_owcsvimport.py b/Orange/widgets/data/tests/test_owcsvimport.py
@@ -145,7 +145,8 @@ def test_type_guessing(self):
             stored_settings={
                 "_session_items": [
                     (path, self.data_csv_types_options.as_dict())
-                ]
+                ],
+                "__version__": 2  # guessing works for versions >= 2
             }
         )
         widget.commit()
@@ -160,9 +161,37 @@ def test_type_guessing(self):
         self.assertIsInstance(domain["numeric2"], ContinuousVariable)
         self.assertIsInstance(domain["string"], StringVariable)
 
+    def test_backward_compatibility(self):
+        """
+        Check that widget have old behaviour on workflows with version < 2
+        """
+        dirname = os.path.dirname(__file__)
+        path = os.path.join(dirname, "data-csv-types.tab")
+        widget = self.create_widget(
+            owcsvimport.OWCSVFileImport,
+            stored_settings={
+                "_session_items": [
+                    (path, self.data_csv_types_options.as_dict())
+                ],
+                "__version__": 1  # guessing works for versions >= 2
+            }
+        )
+        widget.commit()
+        self.wait_until_finished(widget)
+        output = self.get_output("Data", widget)
+        domain = output.domain
+
+        self.assertIsInstance(domain["time"], StringVariable)
+        self.assertIsInstance(domain["discrete1"], ContinuousVariable)
+        self.assertIsInstance(domain["discrete2"], StringVariable)
+        self.assertIsInstance(domain["numeric1"], ContinuousVariable)
+        self.assertIsInstance(domain["numeric2"], ContinuousVariable)
+        self.assertIsInstance(domain["string"], StringVariable)
+
 
 class TestImportDialog(GuiTest):
-    def test_dialog(self):
+    @staticmethod
+    def test_dialog():
         dirname = os.path.dirname(__file__)
         path = os.path.join(dirname, "grep_file.txt")
         d = owcsvimport.CSVImportDialog()
@@ -241,7 +270,8 @@ def test_load_csv(self):
             list(df.iloc[:, 1]), ["one", "three"]
         )
 
-    def test_convert(self):
+    @staticmethod
+    def test_convert():
         contents = (
             b'I, J,  K\n'
             b' , A,   \n'