Skip to content

Commit

Permalink
Convert lists and dicts to str in parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
rad-pat committed Sep 19, 2024
1 parent 5e12fc9 commit 61ab2df
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions plaidcloud/utilities/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uuid
import unicodecsv as csv

import pyarrow as pa

Check warning on line 8 in plaidcloud/utilities/query.py

View workflow job for this annotation

GitHub Actions / PyLint

[PyLint] plaidcloud/utilities/query.py#L8

E0401: Unable to import 'pyarrow' (import-error)
Raw output
plaidcloud/utilities/query.py:8:0: E0401: Unable to import 'pyarrow' (import-error)
import pandas as pd
import numpy as np
import requests
Expand All @@ -21,7 +22,7 @@
from plaidcloud.utilities.remote.dimension import Dimensions

__author__ = 'Paul Morel'
__copyright__ = 'Copyright 2010-2021, Tartan Solutions, Inc'
__copyright__ = 'Copyright 2010-2024, Tartan Solutions, Inc'
__credits__ = ['Paul Morel']
__license__ = 'Apache 2.0'
__maintainer__ = 'Paul Morel'
Expand Down Expand Up @@ -496,8 +497,12 @@ def bulk_insert_dataframe(self, table_object, df, append=False, chunk_size=50000
load_type='parquet',
)
if data_load:
schema = pa.Schema.from_pandas(df)
for index, col in enumerate(schema):
if isinstance(col.type, pa.ListType, pa.StructType):
schema = schema.set(index, col.with_type(pa.string()))
with tempfile.NamedTemporaryFile(mode='wb+') as pq_file:
df.to_parquet(pq_file)
df.to_parquet(pq_file, schema=schema)
# upload the file
pq_file.seek(0)
self._upload(data_load['load_type'], data_load['upload_path'], pq_file)
Expand Down

0 comments on commit 61ab2df

Please sign in to comment.