jnothman · wlandecker · Nov 30, 2018 · Nov 30, 2018 · jnothman · Dec 17, 2018
diff --git a/searchgrid.py b/searchgrid.py
@@ -2,6 +2,7 @@
 from collections import defaultdict as _defaultdict
 import itertools as _itertools
 
+from sklearn.compose import ColumnTransformer as _ColumnTransformer
 from sklearn.model_selection import GridSearchCV as _GridSearchCV
 from sklearn.pipeline import Pipeline as _Pipeline
 from sklearn.pipeline import FeatureUnion as _FeatureUnion
@@ -147,7 +148,7 @@ def _name_steps(steps, default='alt'):
         if len(estimators) > 1:
             while None in estimators:
                 estimators.remove(None)
-        step_names = {type(estimator).__name__.lower()
+        step_names = {_name_of_estimator(estimator)
                       for estimator in estimators}
         if len(step_names) > 1:
             names.append(default)
@@ -173,6 +174,19 @@ def _name_steps(steps, default='alt'):
     return named_steps, grid
 
 
+def _name_of_estimator(estimator):
+    if isinstance(estimator, tuple):
+        # tuples comes from ColumnTransformers. At the moment, sklearn accepts
+        # both (estimator, list_of_columns) and (list_of_columns, estimator)
+        tuple_types = {type(tuple_entry) for tuple_entry in estimator}
+        tuple_types.discard(list)
+        estimator_type = tuple_types.pop()
+    else:
+        estimator_type = type(estimator)
+
+    return estimator_type.__name__.lower()
+
+
 def make_pipeline(*steps, **kwargs):
     """Construct a Pipeline with alternative estimators to search over
 
@@ -257,3 +271,33 @@ def make_union(*transformers, **kwargs):
     """
     steps, grid = _name_steps(transformers)
     return set_grid(_FeatureUnion(steps, **kwargs), **grid)
+
+
+def make_column_transformer(*transformers, **kwargs):
+    """Construct a ColumnTransformer with alternative estimators to search over
+
+    Parameters
+    ----------
+    steps
+        Each step is specified as one of:
+
+        * an (estimator, [column_names]) or ([column_names], estimator) tuple
+        * None (meaning no features)
+        * a list of the above, indicating that a grid search should alternate
+          over the estimators (or None) in the list
+    kwargs
+        Keyword arguments to the constructor of
+        :class:`sklearn.pipeline.FeatureUnion`.
+
+    Notes
+    -----
+    Each step is named according to the set of estimator types in its list:
+
+    * if a step has only one type of estimator (disregarding None), it takes
+      that estimator's class name (lowercased)
+    * if a step has estimators of mixed type, the step is named 'alt'
+    * if there are multiple steps of the same name using the above rules,
+      a suffix '-1', '-2', etc. is added.
+    """
+    steps, grid = _name_steps(transformers)
+    return set_grid(_ColumnTransformer(steps, **kwargs), **grid)
diff --git a/test_searchgrid.py b/test_searchgrid.py
@@ -6,7 +6,7 @@
 from sklearn.feature_selection import SelectKBest, SelectPercentile
 from sklearn.datasets import load_iris
 from searchgrid import set_grid, build_param_grid, make_grid_search
-from searchgrid import make_pipeline, make_union
+from searchgrid import make_column_transformer, make_pipeline, make_union
 
 
 @pytest.mark.parametrize(('estimator', 'param_grid'), [
@@ -111,3 +111,36 @@ def test_make_pipeline():
     assert type(pipe) is Pipeline
     assert type(union) is FeatureUnion
     assert pipe.memory == '/path/to/nowhere'
+
+
+def test_make_column_transformer():
+    t1 = (SelectKBest(), ['column1'])
+    t2 = (SelectKBest(), ['column2a', 'column2b'])
+    t3 = (SelectKBest(), ['column3'])
+    t4 = (SelectKBest(), ['column4'])
+    t5 = (SelectPercentile(), ['column5'])
+    t6 = (SelectKBest(), ['column6a', 'column6b'])
+    t7 = (SelectKBest(), ['column7'])
+    t8 = (SelectKBest(), ['column8'])
+    t9 = (SelectPercentile(), ['column9'])
+
+    in_steps = [[t1, None],
+                [t2, t3],
+                [t4, t5],  # mixed
+                t6,
+                [None, t7],
+                [t8, None, t9],  # mixed
+                None]
+    column_transformer = make_column_transformer(*in_steps)
+    names, steps = zip(*column_transformer.transformers)
+
+    assert names == ('selectkbest-1', 'selectkbest-2', 'alt-1',
+                     'selectkbest-3', 'selectkbest-4', 'alt-2', 'nonetype')
+    assert steps == (t1, t2, t4, t6, None, t8, None)
+
+    assert len(column_transformer._param_grid) == 5
+    assert column_transformer._param_grid[names[0]] == [t1, None]
+    assert column_transformer._param_grid[names[1]] == [t2, t3]
+    assert column_transformer._param_grid[names[2]] == [t4, t5]
+    assert column_transformer._param_grid[names[4]] == [None, t7]
+    assert column_transformer._param_grid[names[5]] == [t8, None, t9]