Skip to content

Commit

Permalink
Bumping version to 0.0.25
Browse files Browse the repository at this point in the history
  • Loading branch information
igorborgest committed Dec 7, 2019
1 parent 0776acf commit 5e3f89f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 25 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

> Utility belt to handle data on AWS.
[![Release](https://img.shields.io/badge/release-0.0.24-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Release](https://img.shields.io/badge/release-0.0.25-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Downloads](https://img.shields.io/pypi/dm/awswrangler.svg)](https://pypi.org/project/awswrangler/)
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/en/latest/?badge=latest)
Expand Down
2 changes: 1 addition & 1 deletion awswrangler/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = "awswrangler"
__description__ = "Utility belt to handle data on AWS."
__version__ = "0.0.24"
__version__ = "0.0.25"
__license__ = "Apache License 2.0"
44 changes: 24 additions & 20 deletions awswrangler/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pyarrow as pa # type: ignore
from pyarrow import parquet as pq # type: ignore
import tenacity # type: ignore
from s3fs import S3FileSystem # type: ignore

from awswrangler import data_types
from awswrangler.exceptions import (UnsupportedWriteMode, UnsupportedFileFormat, AthenaQueryError, EmptyS3Object,
Expand Down Expand Up @@ -491,13 +492,13 @@ def _get_query_dtype(self, query_execution_id: str) -> Tuple[Dict[str, str], Lis
return dtype, parse_timestamps, parse_dates, converters

def read_sql_athena(self,
sql,
database=None,
s3_output=None,
max_result_size=None,
workgroup=None,
encryption=None,
kms_key=None):
sql: str,
database: Optional[str] = None,
s3_output: Optional[str] = None,
max_result_size: Optional[int] = None,
workgroup: Optional[str] = None,
encryption: Optional[str] = None,
kms_key: Optional[str] = None):
"""
Executes any SQL query on AWS Athena and return a Dataframe of the result.
P.S. If max_result_size is passed, then a iterator of Dataframes is returned.
Expand All @@ -512,18 +513,21 @@ def read_sql_athena(self,
:param kms_key: For SSE-KMS and CSE-KMS , this is the KMS key ARN or ID.
:return: Pandas Dataframe or Iterator of Pandas Dataframes if max_result_size != None
"""
if not s3_output:
s3_output = self._session.athena.create_athena_bucket()
query_execution_id = self._session.athena.run_query(query=sql,
database=database,
s3_output=s3_output,
workgroup=workgroup,
encryption=encryption,
kms_key=kms_key)
query_response = self._session.athena.wait_query(query_execution_id=query_execution_id)
if s3_output is None:
if self._session.athena_s3_output is not None:
s3_output = self._session.athena_s3_output
else:
s3_output = self._session.athena.create_athena_bucket()
query_execution_id: str = self._session.athena.run_query(query=sql,
database=database,
s3_output=s3_output,
workgroup=workgroup,
encryption=encryption,
kms_key=kms_key)
query_response: Dict = self._session.athena.wait_query(query_execution_id=query_execution_id)
if query_response["QueryExecution"]["Status"]["State"] in ["FAILED", "CANCELLED"]:
reason = query_response["QueryExecution"]["Status"]["StateChangeReason"]
message_error = f"Query error: {reason}"
reason: str = query_response["QueryExecution"]["Status"]["StateChangeReason"]
message_error: str = f"Query error: {reason}"
raise AthenaQueryError(message_error)
else:
dtype, parse_timestamps, parse_dates, converters = self._get_query_dtype(
Expand Down Expand Up @@ -1133,7 +1137,7 @@ def read_parquet(self,
path: str,
columns: Optional[List[str]] = None,
filters: Optional[Union[List[Tuple[Any]], List[Tuple[Any]]]] = None,
procs_cpu_bound: Optional[int] = None):
procs_cpu_bound: Optional[int] = None) -> pd.DataFrame:
"""
Read parquet data from S3
Expand All @@ -1145,7 +1149,7 @@ def read_parquet(self,
path = path[:-1] if path[-1] == "/" else path
procs_cpu_bound = 1 if self._session.procs_cpu_bound is None else self._session.procs_cpu_bound if procs_cpu_bound is None else procs_cpu_bound
use_threads: bool = True if procs_cpu_bound > 1 else False
fs = s3.get_fs(session_primitives=self._session.primitives)
fs: S3FileSystem = s3.get_fs(session_primitives=self._session.primitives)
fs = pa.filesystem._ensure_filesystem(fs)
return pq.read_table(source=path, columns=columns, filters=filters,
filesystem=fs).to_pandas(use_threads=use_threads)
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ yapf~=0.29.0
mypy~=0.750
flake8~=3.7.9
pytest-cov~=2.8.1
cfn-lint~=0.25.7
cfn-lint~=0.26.0
twine~=3.1.1
wheel~=0.33.6
sphinx~=2.2.2
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
numpy~=1.17.4
pandas~=0.25.3
pyarrow~=0.15.1
botocore~=1.13.30
boto3~=1.10.30
botocore~=1.13.34
boto3~=1.10.34
s3fs~=0.4.0
tenacity~=6.0.0
pg8000~=1.13.2

0 comments on commit 5e3f89f

Please sign in to comment.