diff --git a/CHANGELOG.md b/CHANGELOG.md index d28d7624..3a6667ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,11 @@ release (0.9.0), unrecognized arguments/keywords for these methods of creating a instead of being passed as ClickHouse server settings. This is in conjunction with some refactoring in Client construction. The supported method of passing ClickHouse server settings is to prefix such arguments/query parameters with`ch_`. +## 0.8.14, 2025-01-13 +### Bug Fix +- Fix an edge case where a Pandas dataframe that contains _only_ Int64 (or smaller) values would cause an exception when +inserting into a ClickHouse "big int" table of U/Int128/256. Closes https://github.com/ClickHouse/clickhouse-connect/issues/452 + ## 0.8.13, 2025-01-07 ### Bug Fix - Fix missing default for new access_token parameter. Thanks to [Lukas Thaler](https://github.com/lukasthalerINNIO) for the PR. diff --git a/clickhouse_connect/__version__.py b/clickhouse_connect/__version__.py index 953ed661..9446a3c6 100644 --- a/clickhouse_connect/__version__.py +++ b/clickhouse_connect/__version__.py @@ -1 +1 @@ -version = '0.8.13' +version = '0.8.14' diff --git a/clickhouse_connect/datatypes/numeric.py b/clickhouse_connect/datatypes/numeric.py index 5f0becf1..7e86b623 100644 --- a/clickhouse_connect/datatypes/numeric.py +++ b/clickhouse_connect/datatypes/numeric.py @@ -98,6 +98,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence: class BigInt(ClickHouseType, registered=False): _signed = True valid_formats = 'string', 'native' + python_type = int def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryContext, _read_state: Any): signed = self._signed diff --git a/clickhouse_connect/driver/insert.py b/clickhouse_connect/driver/insert.py index a54ae37c..af5d9ed7 100644 --- a/clickhouse_connect/driver/insert.py +++ b/clickhouse_connect/driver/insert.py @@ -148,19 +148,20 @@ def _convert_pandas(self, df): data = [] for df_col_name, col_name, ch_type in zip(df.columns, self.column_names, self.column_types): df_col = df[df_col_name] - d_type = str(df_col.dtype) + d_type_kind = df_col.dtype.kind if ch_type.python_type == int: - if 'float' in d_type: + if d_type_kind == 'f': df_col = df_col.round().astype(ch_type.base_type, copy=False) - else: - df_col = df_col.astype(ch_type.base_type, copy=False) - elif 'datetime' in ch_type.np_type and (pd_time_test(df_col) or 'datetime64[ns' in d_type): + elif d_type_kind in ('i', 'u') and not df_col.hasnans: + data.append(df_col.to_list()) + continue + elif 'datetime' in ch_type.np_type and (pd_time_test(df_col) or 'datetime64[ns' in str(df_col.dtype)): div = ch_type.nano_divisor data.append([None if pd.isnull(x) else x.value // div for x in df_col]) self.column_formats[col_name] = 'int' continue if ch_type.nullable: - if d_type == 'object': + if d_type_kind == 'O': # This is ugly, but the multiple replaces seem required as a result of this bug: # https://github.com/pandas-dev/pandas/issues/29024 df_col = df_col.replace({pd.NaT: None}).replace({np.nan: None}) diff --git a/tests/integration_tests/test_pandas.py b/tests/integration_tests/test_pandas.py index ae512170..12ba83b6 100644 --- a/tests/integration_tests/test_pandas.py +++ b/tests/integration_tests/test_pandas.py @@ -131,13 +131,14 @@ def test_pandas_low_card(test_client: Client, table_context: Callable): def test_pandas_large_types(test_client: Client, table_context: Callable): - columns = ['key String', 'value Int256'] + columns = ['key String', 'value Int256', 'u_value UInt256' + ] key2_value = 30000000000000000000000000000000000 if not test_client.min_version('21'): columns = ['key String', 'value Int64'] key2_value = 3000000000000000000 with table_context('test_pandas_big_int', columns): - df = pd.DataFrame([['key1', 2000], ['key2', key2_value]], columns=['key', 'value']) + df = pd.DataFrame([['key1', 2000, 50], ['key2', key2_value, 70], ['key3', -2350, 70]], columns=['key', 'value', 'u_value']) source_df = df.copy() test_client.insert_df('test_pandas_big_int', df) result_df = test_client.query_df('SELECT * FROM test_pandas_big_int')