Skip to content

Commit

Permalink
Bumping version to 0.0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
igorborgest committed Oct 7, 2019
1 parent 9c6d601 commit 6fcb149
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 94 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

> Utility belt to handle data on AWS.
[![Release](https://img.shields.io/badge/release-0.0.8-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Release](https://img.shields.io/badge/release-0.0.9-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Release](https://img.shields.io/pypi/dm/awswrangler.svg)](https://pypi.org/project/awswrangler/)
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/en/latest/?badge=latest)
Expand Down
2 changes: 1 addition & 1 deletion awswrangler/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = "awswrangler"
__description__ = "Utility belt to handle data on AWS."
__version__ = "0.0.8"
__version__ = "0.0.9"
__license__ = "Apache License 2.0"
59 changes: 29 additions & 30 deletions awswrangler/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import csv
from datetime import datetime

import pandas
import pyarrow
from pyarrow import parquet
import pandas as pd
import pyarrow as pa
from pyarrow import parquet as pq

from awswrangler import data_types
from awswrangler.exceptions import (UnsupportedWriteMode,
Expand Down Expand Up @@ -239,21 +239,20 @@ def _read_csv_iterator(
lineterminator=lineterminator)
forgotten_bytes = len(body[last_char:])

df = pandas.read_csv(
StringIO(body[:last_char].decode("utf-8")),
header=header,
names=names,
usecols=usecols,
sep=sep,
quotechar=quotechar,
quoting=quoting,
escapechar=escapechar,
parse_dates=parse_dates,
infer_datetime_format=infer_datetime_format,
lineterminator=lineterminator,
dtype=dtype,
encoding=encoding,
converters=converters)
df = pd.read_csv(StringIO(body[:last_char].decode("utf-8")),
header=header,
names=names,
usecols=usecols,
sep=sep,
quotechar=quotechar,
quoting=quoting,
escapechar=escapechar,
parse_dates=parse_dates,
infer_datetime_format=infer_datetime_format,
lineterminator=lineterminator,
dtype=dtype,
encoding=encoding,
converters=converters)
yield df
if count == 1: # first chunk
names = df.columns
Expand Down Expand Up @@ -402,7 +401,7 @@ def _read_csv_once(
Key=key_path,
Fileobj=buff)
buff.seek(0),
dataframe = pandas.read_csv(
dataframe = pd.read_csv(
buff,
header=header,
names=names,
Expand Down Expand Up @@ -822,7 +821,7 @@ def _data_to_s3_object_writer(dataframe,
extra_args=None,
isolated_dataframe=False):
fs = s3.get_fs(session_primitives=session_primitives)
fs = pyarrow.filesystem._ensure_filesystem(fs)
fs = pa.filesystem._ensure_filesystem(fs)
s3.mkdir_if_not_exists(fs, path)

if compression is None:
Expand All @@ -834,7 +833,7 @@ def _data_to_s3_object_writer(dataframe,
else:
raise InvalidCompression(compression)

guid = pyarrow.compat.guid()
guid = pa.compat.guid()
if file_format == "parquet":
outfile = f"{guid}.parquet{compression_end}"
elif file_format == "csv":
Expand Down Expand Up @@ -905,9 +904,9 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,
logger.debug(f"Casting column {name} Int64 to float64")

# Converting Pandas Dataframe to Pyarrow's Table
table = pyarrow.Table.from_pandas(df=dataframe,
preserve_index=preserve_index,
safe=False)
table = pa.Table.from_pandas(df=dataframe,
preserve_index=preserve_index,
safe=False)

# Casting on Pyarrow
if cast_columns:
Expand All @@ -923,11 +922,11 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,

# Persisting on S3
with fs.open(path, "wb") as f:
parquet.write_table(table,
f,
compression=compression,
coerce_timestamps="ms",
flavor="spark")
pq.write_table(table,
f,
compression=compression,
coerce_timestamps="ms",
flavor="spark")

# Casting back on Pandas if necessary
if isolated_dataframe is False:
Expand Down Expand Up @@ -1047,7 +1046,7 @@ def read_log_query(self,
col_name = col["field"]
new_row[col_name] = col["value"]
pre_df.append(new_row)
return pandas.DataFrame(pre_df)
return pd.DataFrame(pre_df)

@staticmethod
def normalize_columns_names_athena(dataframe, inplace=True):
Expand Down
4 changes: 2 additions & 2 deletions awswrangler/spark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

import pandas
import pandas as pd

from pyspark.sql.functions import pandas_udf, PandasUDFType, spark_partition_id
from pyspark.sql.types import TimestampType
Expand Down Expand Up @@ -107,7 +107,7 @@ def write(pandas_dataframe):
mode="append",
procs_cpu_bound=1,
cast_columns=casts)
return pandas.DataFrame.from_dict({"objects_paths": paths})
return pd.DataFrame.from_dict({"objects_paths": paths})

df_objects_paths = dataframe.repartition(numPartitions=num_partitions) \
.withColumn("aws_data_wrangler_internal_partition_id", spark_partition_id()) \
Expand Down
16 changes: 8 additions & 8 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
yapf>=0.28.0
flake8>=3.7.8
pytest>=5.1.0
cfn-lint>=0.23.3
twine>=1.13.0
pyspark>=2.4.3
wheel>=0.33.6
sphinx>=2.1.2
yapf~=0.28.0
flake8~=3.7.8
pytest~=5.1.0
cfn-lint~=0.23.3
twine~=1.13.0
pyspark~=2.4.3
wheel~=0.33.6
sphinx~=2.1.2
14 changes: 7 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
botocore>=1.12.238
boto3>=1.9.238
pandas>=0.25.1
s3fs>=0.3.4
pyarrow>=0.14.1
tenacity>=5.1.1
pg8000>=1.13.2
botocore~=1.12.239
boto3~=1.9.239
pandas~=0.25.1
s3fs~=0.3.4
pyarrow~=0.14.0
tenacity~=5.1.1
pg8000~=1.13.2
14 changes: 7 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
exclude=["tests"]),
python_requires=">=3.6",
install_requires=[
"pyarrow>=0.14.0",
"pandas>=0.25.1",
"botocore>=1.12.239",
"boto3>=1.9.239",
"s3fs>=0.3.4",
"tenacity>=5.1.1",
"pg8000>=1.13.2",
"pyarrow~=0.14.0",
"pandas~=0.25.1",
"botocore~=1.12.239",
"boto3~=1.9.239",
"s3fs~=0.3.4",
"tenacity~=5.1.1",
"pg8000~=1.13.2",
],
)
8 changes: 3 additions & 5 deletions testing/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM openjdk:8-jre-stretch

ARG SPARK_VERSION=2.4.3
ARG SPARK_VERSION=2.4.4

RUN apt-get update -y
RUN apt-get install -y jq make build-essential libssl-dev zlib1g-dev libbz2-dev \
Expand All @@ -22,13 +22,11 @@ RUN eval "$(pyenv init -)" && \
curl --url "http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar" --output ${SPARK_HOME}/jars/hadoop-aws-2.7.3.jar && \
mkdir -p ${SPARK_HOME}/conf && \
echo spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem >> ${SPARK_HOME}/conf/spark-defaults.conf
RUN $PIP install aws-sam-cli
RUN $PIP install awscli
ADD requirements.txt /root/
RUN $PIP install -r /root/requirements.txt
RUN $PIP install --upgrade -r /root/requirements.txt
RUN rm -rf /root/requirements.txt
ADD requirements-dev.txt /root/
RUN $PIP install -r /root/requirements-dev.txt
RUN $PIP install --upgrade -r /root/requirements-dev.txt
RUN rm -rf /root/requirements-dev.txt

ENTRYPOINT ["/bin/sh"]
4 changes: 2 additions & 2 deletions testing/test_awswrangler/test_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
import boto3
import pandas
import pandas as pd

from awswrangler import Session

Expand Down Expand Up @@ -53,7 +53,7 @@ def table(
bucket,
database,
):
dataframe = pandas.read_csv("data_samples/micro.csv")
dataframe = pd.read_csv("data_samples/micro.csv")
path = f"s3://{bucket}/test/"
table = "test"
session.pandas.to_parquet(dataframe=dataframe,
Expand Down
Loading

0 comments on commit 6fcb149

Please sign in to comment.