Skip to content

Commit

Permalink
Bumping version to 0.0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
igorborgest committed Sep 20, 2019
1 parent 6af5c7c commit 54a0d2c
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 17 deletions.
2 changes: 1 addition & 1 deletion awswrangler/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = "awswrangler"
__description__ = "Utility belt to handle data on AWS."
__version__ = "0.0.2"
__version__ = "0.0.3"
__license__ = "Apache License 2.0"
26 changes: 18 additions & 8 deletions awswrangler/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,17 +203,19 @@ def create_table(self,
TableInput=table_input)

def add_partitions(self, database, table, partition_paths, file_format,
extra_args):
compression, extra_args):
if not partition_paths:
return None
partitions = list()
for partition in partition_paths:
if file_format == "parquet":
partition_def = Glue.parquet_partition_definition(
partition=partition)
partition=partition, compression=compression)
elif file_format == "csv":
partition_def = Glue.csv_partition_definition(
partition=partition, extra_args=extra_args)
partition=partition,
compression=compression,
extra_args=extra_args)
else:
raise UnsupportedFileFormat(file_format)
partitions.append(partition_def)
Expand All @@ -225,8 +227,12 @@ def add_partitions(self, database, table, partition_paths, file_format,
DatabaseName=database,
TableName=table,
PartitionInputList=page)
if len(res["Errors"]) > 0:
raise ApiError(f"{res['Errors'][0]}")
for error in res["Errors"]:
if "ErrorDetail" in error:
if "ErrorCode" in error["ErrorDetail"]:
if error["ErrorDetail"][
"ErrorCode"] != "AlreadyExistsException":
raise ApiError(f"{error}")

def get_connection_details(self, name):
return self._client_glue.get_connection(
Expand Down Expand Up @@ -355,7 +361,7 @@ def csv_table_definition(table, partition_cols_schema, schema, path,
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat":
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"Compressed": True,
"Compressed": compressed,
"NumberOfBuckets": -1,
"SerdeInfo": {
"Parameters": param,
Expand All @@ -375,7 +381,8 @@ def csv_table_definition(table, partition_cols_schema, schema, path,
}

@staticmethod
def csv_partition_definition(partition, extra_args):
def csv_partition_definition(partition, compression, extra_args):
compressed = False if compression is None else True
sep = extra_args["sep"] if "sep" in extra_args else ","
serde = extra_args.get("serde")
if serde == "OpenCSVSerDe":
Expand All @@ -394,6 +401,7 @@ def csv_partition_definition(partition, extra_args):
"StorageDescriptor": {
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"Location": partition[0],
"Compressed": compressed,
"SerdeInfo": {
"Parameters": param,
"SerializationLibrary": serde_fullname,
Expand Down Expand Up @@ -454,11 +462,13 @@ def parquet_table_definition(table, partition_cols_schema, schema, path,
}

@staticmethod
def parquet_partition_definition(partition):
def parquet_partition_definition(partition, compression):
compressed = False if compression is None else True
return {
"StorageDescriptor": {
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"Location": partition[0],
"Compressed": compressed,
"SerdeInfo": {
"Parameters": {
"serialization.format": "1"
Expand Down
18 changes: 15 additions & 3 deletions awswrangler/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,22 @@ def read_sql_athena(self,
parse_dates=parse_timestamps,
quoting=csv.QUOTE_ALL,
max_result_size=max_result_size)
if len(ret.index) > 0:
if max_result_size is None:
if len(ret.index) > 0:
for col in parse_dates:
ret[col] = ret[col].dt.date
return ret
else:
return Pandas._apply_dates_to_generator(
generator=ret, parse_dates=parse_dates)

@staticmethod
def _apply_dates_to_generator(generator, parse_dates):
for df in generator:
if len(df.index) > 0:
for col in parse_dates:
ret[col] = ret[col].dt.date
return ret
df[col] = df[col].dt.date
yield df

def to_csv(
self,
Expand Down
2 changes: 1 addition & 1 deletion building/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM lambci/lambda:build-python3.7
FROM lambci/lambda:build-python3.6

RUN pip install --upgrade pip

Expand Down
4 changes: 0 additions & 4 deletions testing/test_awswrangler/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,6 @@ def test_to_s3(
list(dataframe2.columns))
else:
assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[
dataframe2["id"] == 0].iloc[0]["name"]


def test_to_parquet_with_cast(
Expand Down Expand Up @@ -594,8 +592,6 @@ def test_to_csv_with_sep(
sleep(2)
assert len(dataframe.index) == len(dataframe2.index)
assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[
dataframe2["id"] == 0].iloc[0]["name"]


def test_to_csv_serde_exception(
Expand Down

0 comments on commit 54a0d2c

Please sign in to comment.