diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py index 799a6849b..a71e09015 100644 --- a/awswrangler/_data_types.py +++ b/awswrangler/_data_types.py @@ -49,15 +49,17 @@ def pyarrow2athena( # noqa: PLR0911,PLR0912 if pa.types.is_binary(dtype): return "binary" if pa.types.is_dictionary(dtype): - return pyarrow2athena(dtype=dtype.value_type) + return pyarrow2athena(dtype=dtype.value_type, ignore_null=ignore_null) if pa.types.is_decimal(dtype): return f"decimal({dtype.precision},{dtype.scale})" if pa.types.is_list(dtype): - return f"array<{pyarrow2athena(dtype=dtype.value_type)}>" + return f"array<{pyarrow2athena(dtype=dtype.value_type, ignore_null=ignore_null)}>" if pa.types.is_struct(dtype): - return f"struct<{','.join([f'{f.name}:{pyarrow2athena(dtype=f.type)}' for f in dtype])}>" + return ( + f"struct<{','.join([f'{f.name}:{pyarrow2athena(dtype=f.type, ignore_null=ignore_null)}' for f in dtype])}>" + ) if pa.types.is_map(dtype): - return f"map<{pyarrow2athena(dtype=dtype.key_type)},{pyarrow2athena(dtype=dtype.item_type)}>" + return f"map<{pyarrow2athena(dtype=dtype.key_type, ignore_null=ignore_null)},{pyarrow2athena(dtype=dtype.item_type, ignore_null=ignore_null)}>" if dtype == pa.null(): if ignore_null: return "" diff --git a/tests/unit/test_athena_parquet.py b/tests/unit/test_athena_parquet.py index afdff724d..2d50099e3 100644 --- a/tests/unit/test_athena_parquet.py +++ b/tests/unit/test_athena_parquet.py @@ -376,14 +376,14 @@ def test_store_metadata_partitions_sample_dataset(glue_database, glue_table, pat def test_store_metadata_ignore_null_columns(glue_database, glue_table, path): df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2_null": [None, None, None], "c3_null": [None, None, None]}) - wr.s3.to_parquet(df=df, path=path, dataset=True, dtype={"c2_null": "int", "c3_null": "int"}) + wr.s3.to_parquet(df=df, path=path, dataset=True, dtype={"c2_null": "int", "c3_null": "array"}) wr.s3.store_parquet_metadata( path=path, database=glue_database, table=glue_table, ignore_null=True, dataset=True, - dtype={"c2_null": "int", "c3_null": "int"}, + dtype={"c2_null": "int", "c3_null": "array"}, )