diff --git a/dask_expr/_concat.py b/dask_expr/_concat.py index 1301434bb..d0ae12f94 100644 --- a/dask_expr/_concat.py +++ b/dask_expr/_concat.py @@ -46,9 +46,14 @@ def _frames(self): @functools.cached_property def _meta(self): + # ignore DataFrame without columns to avoid dtype upcasting meta = make_meta( methods.concat( - [meta_nonempty(df._meta) for df in self._frames], + [ + meta_nonempty(df._meta) + for df in self._frames + if df.ndim < 2 or len(df._meta.columns) > 0 + ], join=self.join, filter_warning=False, axis=self.axis, diff --git a/dask_expr/tests/test_concat.py b/dask_expr/tests/test_concat.py index c1cd54f79..505c5363d 100644 --- a/dask_expr/tests/test_concat.py +++ b/dask_expr/tests/test_concat.py @@ -140,6 +140,17 @@ def test_concat_index(df, pdf): assert query._name == expected._name +def test_concat_dataframe_empty(): + df = lib.DataFrame({"a": [100, 200, 300]}, dtype="int64") + empty_df = lib.DataFrame([], dtype="int64") + df_concat = lib.concat([df, empty_df]) + + ddf = from_pandas(df, npartitions=1) + empty_ddf = from_pandas(empty_df, npartitions=1) + ddf_concat = concat([ddf, empty_ddf]) + assert_eq(df_concat, ddf_concat) + + def test_concat_after_merge(): pdf1 = lib.DataFrame( {"x": range(10), "y": [1, 2, 3, 4, 5] * 2, "z": ["cat", "dog"] * 5}