From d29125265a627431562b0d7a51919dfb294d84b1 Mon Sep 17 00:00:00 2001 From: Leon Luttenberger Date: Tue, 5 Mar 2024 10:00:52 -0600 Subject: [PATCH] chore: Prepare 3.7.0 release (#2703) --- .bumpversion.toml | 2 +- README.md | 80 +++++++++---------- VERSION | 2 +- awswrangler/__metadata__.py | 2 +- awswrangler/athena/_read.py | 16 ++-- awswrangler/catalog/_create.py | 4 +- awswrangler/s3/_read_orc.py | 4 +- awswrangler/s3/_read_parquet.py | 4 +- awswrangler/s3/_read_text.py | 6 +- awswrangler/s3/_write_orc.py | 4 +- awswrangler/s3/_write_parquet.py | 6 +- awswrangler/s3/_write_text.py | 12 +-- docs/source/install.rst | 2 +- pyproject.toml | 2 +- test_infra/pyproject.toml | 2 +- tests/unit/test_metadata.py | 2 +- tutorials/001 - Introduction.ipynb | 20 ++--- ...ySQL, PostgreSQL, SQL Server, Oracle.ipynb | 30 +++---- tutorials/014 - Schema Evolution.ipynb | 6 +- tutorials/021 - Global Configurations.ipynb | 2 +- ...22 - Writing Partitions Concurrently.ipynb | 2 +- .../023 - Flexible Partitions Filter.ipynb | 2 +- tutorials/030 - Data Api.ipynb | 8 +- 23 files changed, 110 insertions(+), 110 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index c0accce44..dad81b327 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "3.7.0b1" +current_version = "3.7.0" commit = false tag = false tag_name = "{new_version}" diff --git a/README.md b/README.md index 5f077eb31..86e4fca95 100644 --- a/README.md +++ b/README.md @@ -100,27 +100,27 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 ## At scale AWS SDK for pandas can also run your workflows at scale by leveraging [Modin](https://modin.readthedocs.io/en/stable/) and [Ray](https://www.ray.io/). Both projects aim to speed up data workloads by distributing processing over a cluster of workers. -The quickest way to get started is to use AWS Glue with Ray. Read our [docs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/scale.html), our blogs ([1](https://aws.amazon.com/blogs/big-data/scale-aws-sdk-for-pandas-workloads-with-aws-glue-for-ray/)/[2](https://aws.amazon.com/blogs/big-data/advanced-patterns-with-aws-sdk-for-pandas-on-aws-glue-for-ray/)), or head to our latest [tutorials](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) to discover even more features. +The quickest way to get started is to use AWS Glue with Ray. Read our [docs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html), our blogs ([1](https://aws.amazon.com/blogs/big-data/scale-aws-sdk-for-pandas-workloads-with-aws-glue-for-ray/)/[2](https://aws.amazon.com/blogs/big-data/advanced-patterns-with-aws-sdk-for-pandas-on-aws-glue-for-ray/)), or head to our latest [tutorials](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) to discover even more features. > ⚠️ **Ray is currently not available for Python 3.12. While AWS SDK for pandas supports Python 3.12, it cannot be used at scale.** ## [Read The Docs](https://aws-sdk-pandas.readthedocs.io/) -- [**What is AWS SDK for pandas?**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/about.html) -- [**Install**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html) - - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#pypi-pip) - - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#conda) - - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-lambda-layer) - - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-glue-python-shell-jobs) - - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-glue-pyspark-jobs) - - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#amazon-sagemaker-notebook) - - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#amazon-sagemaker-notebook-lifecycle) - - [EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#emr) - - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#from-source) -- [**At scale**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/scale.html) - - [Getting Started](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/scale.html#getting-started) - - [Supported APIs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/scale.html#supported-apis) - - [Resources](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/scale.html#resources) +- [**What is AWS SDK for pandas?**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/about.html) +- [**Install**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html) + - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#pypi-pip) + - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#conda) + - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-lambda-layer) + - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-python-shell-jobs) + - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-pyspark-jobs) + - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook) + - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook-lifecycle) + - [EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#emr) + - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#from-source) +- [**At scale**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html) + - [Getting Started](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#getting-started) + - [Supported APIs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#supported-apis) + - [Resources](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#resources) - [**Tutorials**](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) - [001 - Introduction](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/001%20-%20Introduction.ipynb) - [002 - Sessions](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/002%20-%20Sessions.ipynb) @@ -162,30 +162,30 @@ The quickest way to get started is to use AWS Glue with Ray. Read our [docs](htt - [039 - Athena Iceberg](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/039%20-%20Athena%20Iceberg.ipynb) - [040 - EMR Serverless](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/040%20-%20EMR%20Serverless.ipynb) - [041 - Apache Spark on Amazon Athena](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/041%20-%20Apache%20Spark%20on%20Amazon%20Athena.ipynb) -- [**API Reference**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html) - - [Amazon S3](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-s3) - - [AWS Glue Catalog](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#aws-glue-catalog) - - [Amazon Athena](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-athena) - - [Amazon Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-redshift) - - [PostgreSQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#postgresql) - - [MySQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#mysql) - - [SQL Server](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#sqlserver) - - [Oracle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#oracle) - - [Data API Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#data-api-redshift) - - [Data API RDS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#data-api-rds) - - [OpenSearch](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#opensearch) - - [AWS Glue Data Quality](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#aws-glue-data-quality) - - [Amazon Neptune](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-neptune) - - [DynamoDB](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#dynamodb) - - [Amazon Timestream](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-timestream) - - [Amazon EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-emr) - - [Amazon CloudWatch Logs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-cloudwatch-logs) - - [Amazon Chime](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-chime) - - [Amazon QuickSight](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#amazon-quicksight) - - [AWS STS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#aws-sts) - - [AWS Secrets Manager](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#aws-secrets-manager) - - [Global Configurations](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#global-configurations) - - [Distributed - Ray](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html#distributed-ray) +- [**API Reference**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html) + - [Amazon S3](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-s3) + - [AWS Glue Catalog](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-glue-catalog) + - [Amazon Athena](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-athena) + - [Amazon Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-redshift) + - [PostgreSQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#postgresql) + - [MySQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#mysql) + - [SQL Server](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#sqlserver) + - [Oracle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#oracle) + - [Data API Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#data-api-redshift) + - [Data API RDS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#data-api-rds) + - [OpenSearch](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#opensearch) + - [AWS Glue Data Quality](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-glue-data-quality) + - [Amazon Neptune](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-neptune) + - [DynamoDB](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#dynamodb) + - [Amazon Timestream](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-timestream) + - [Amazon EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-emr) + - [Amazon CloudWatch Logs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-cloudwatch-logs) + - [Amazon Chime](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-chime) + - [Amazon QuickSight](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-quicksight) + - [AWS STS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-sts) + - [AWS Secrets Manager](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-secrets-manager) + - [Global Configurations](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#global-configurations) + - [Distributed - Ray](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#distributed-ray) - [**License**](https://github.com/aws/aws-sdk-pandas/blob/main/LICENSE.txt) - [**Contributing**](https://github.com/aws/aws-sdk-pandas/blob/main/CONTRIBUTING.md) diff --git a/VERSION b/VERSION index 8c073e4f3..240bba906 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.7.0b1 \ No newline at end of file +3.7.0 \ No newline at end of file diff --git a/awswrangler/__metadata__.py b/awswrangler/__metadata__.py index ad57b6df4..423ed200d 100644 --- a/awswrangler/__metadata__.py +++ b/awswrangler/__metadata__.py @@ -7,5 +7,5 @@ __title__: str = "awswrangler" __description__: str = "Pandas on AWS." -__version__: str = "3.7.0b1" +__version__: str = "3.7.0" __license__: str = "Apache License 2.0" diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 21b502b4d..efe407745 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -792,11 +792,11 @@ def read_sql_query( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are three approaches available through ctas_approach and unload_approach parameters:** @@ -860,7 +860,7 @@ def read_sql_query( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! @@ -1137,11 +1137,11 @@ def read_sql_table( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are three approaches available through ctas_approach and unload_approach parameters:** @@ -1205,7 +1205,7 @@ def read_sql_table( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! diff --git a/awswrangler/catalog/_create.py b/awswrangler/catalog/_create.py index 524be0a4d..45175c6ed 100644 --- a/awswrangler/catalog/_create.py +++ b/awswrangler/catalog/_create.py @@ -1079,7 +1079,7 @@ def create_csv_table( If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html sep : str String of length 1. Field delimiter for the output file. skip_header_line_count : Optional[int] @@ -1260,7 +1260,7 @@ def create_json_table( If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html serde_library : Optional[str] Specifies the SerDe Serialization library which will be used. You need to provide the Class library name as a string. diff --git a/awswrangler/s3/_read_orc.py b/awswrangler/s3/_read_orc.py index 143e726d4..1561a8430 100644 --- a/awswrangler/s3/_read_orc.py +++ b/awswrangler/s3/_read_orc.py @@ -224,7 +224,7 @@ def read_orc( must return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional List of columns to read from the file(s). validate_schema : bool, default False @@ -386,7 +386,7 @@ def read_orc_table( must return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional List of columns to read from the file(s). validate_schema : bool, default False diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index 35d607e80..8dde5c236 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -397,7 +397,7 @@ def read_parquet( must return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional List of columns to read from the file(s). validate_schema : bool, default False @@ -639,7 +639,7 @@ def read_parquet_table( must return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional List of columns to read from the file(s). validate_schema : bool, default False diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py index d78ef52c9..aea535af1 100644 --- a/awswrangler/s3/_read_text.py +++ b/awswrangler/s3/_read_text.py @@ -236,7 +236,7 @@ def read_csv( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html s3_additional_kwargs: dict[str, Any], optional Forwarded to botocore requests. ray_args: typing.RaySettings, optional @@ -398,7 +398,7 @@ def read_fwf( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html s3_additional_kwargs: dict[str, Any], optional Forwarded to botocore requests. ray_args: typing.RaySettings, optional @@ -567,7 +567,7 @@ def read_json( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html s3_additional_kwargs: dict[str, Any], optional Forwarded to botocore requests. ray_args: typing.RaySettings, optional diff --git a/awswrangler/s3/_write_orc.py b/awswrangler/s3/_write_orc.py index fd1d5cc23..e21ff27cb 100644 --- a/awswrangler/s3/_write_orc.py +++ b/awswrangler/s3/_write_orc.py @@ -405,7 +405,7 @@ def to_orc( concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode: str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. catalog_versioning : bool @@ -414,7 +414,7 @@ def to_orc( If True allows schema evolution (new or missing columns), otherwise a exception will be raised. True by default. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/awswrangler/s3/_write_parquet.py b/awswrangler/s3/_write_parquet.py index e49412493..8d4849f93 100644 --- a/awswrangler/s3/_write_parquet.py +++ b/awswrangler/s3/_write_parquet.py @@ -433,18 +433,18 @@ def to_parquet( concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode: str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/004%20-%20Parquet%20Datasets.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/004%20-%20Parquet%20Datasets.html catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. True by default. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py index 1e9f1c8ae..4dc01677f 100644 --- a/awswrangler/s3/_write_text.py +++ b/awswrangler/s3/_write_text.py @@ -171,18 +171,18 @@ def to_csv( # noqa: PLR0912,PLR0915 concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")). False by default. Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional @@ -716,18 +716,18 @@ def to_json( # noqa: PLR0912,PLR0915 concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-sdk-pandas.readthedocs.io/en/3.7.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/docs/source/install.rst b/docs/source/install.rst index a3fac6a75..82b4e8051 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -215,7 +215,7 @@ Go to your Glue PySpark job and create a new *Job parameters* key/value: To install a specific version, set the value for the above Job parameter as follows: -* Value: ``pyarrow==7,pandas==1.5.3,awswrangler==3.7.0b1`` +* Value: ``pyarrow==7,pandas==1.5.3,awswrangler==3.7.0`` `Official Glue PySpark Reference `_ diff --git a/pyproject.toml b/pyproject.toml index a401d7843..1165ce0e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler" -version = "3.7.0b1" +version = "3.7.0" description = "Pandas on AWS." authors = ["Amazon Web Services"] license = "Apache License 2.0" diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index af64959d0..27df1eae9 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler - test infrastructure" -version = "3.7.0b1" +version = "3.7.0" description = "CDK test infrastructure for AWS SDK for pandas" authors = ["Amazon Web Services"] license = "Apache License 2.0" diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index 27bc38f8d..c83804446 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -2,7 +2,7 @@ def test_metadata(): - assert wr.__version__ == "3.7.0b1" + assert wr.__version__ == "3.7.0" assert wr.__title__ == "awswrangler" assert wr.__description__ == "Pandas on AWS." assert wr.__license__ == "Apache License 2.0" diff --git a/tutorials/001 - Introduction.ipynb b/tutorials/001 - Introduction.ipynb index e26546e0b..b77edf105 100644 --- a/tutorials/001 - Introduction.ipynb +++ b/tutorials/001 - Introduction.ipynb @@ -20,7 +20,7 @@ "\n", "Built on top of other open-source projects like [Pandas](https://github.com/pandas-dev/pandas), [Apache Arrow](https://github.com/apache/arrow) and [Boto3](https://github.com/boto/boto3), it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**.\n", "\n", - "Check our [list of functionalities](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html)." + "Check our [list of functionalities](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html)." ] }, { @@ -32,15 +32,15 @@ "\n", "awswrangler runs almost anywhere over Python 3.8, 3.9 and 3.10, so there are several different ways to install it in the desired environment.\n", "\n", - " - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#pypi-pip)\n", - " - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#conda)\n", - " - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-lambda-layer)\n", - " - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-glue-python-shell-jobs)\n", - " - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#aws-glue-pyspark-jobs)\n", - " - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#amazon-sagemaker-notebook)\n", - " - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#amazon-sagemaker-notebook-lifecycle)\n", - " - [EMR Cluster](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#emr-cluster)\n", - " - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/install.html#from-source)\n", + " - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#pypi-pip)\n", + " - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#conda)\n", + " - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-lambda-layer)\n", + " - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-python-shell-jobs)\n", + " - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-pyspark-jobs)\n", + " - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook)\n", + " - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook-lifecycle)\n", + " - [EMR Cluster](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#emr-cluster)\n", + " - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#from-source)\n", "\n", "Some good practices for most of the above methods are:\n", " - Use new and individual Virtual Environments for each project ([venv](https://docs.python.org/3/library/venv.html))\n", diff --git a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb index f470c4520..2881aa6f4 100644 --- a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb +++ b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb @@ -10,16 +10,16 @@ "\n", "[awswrangler](https://github.com/aws/aws-sdk-pandas)'s Redshift, MySQL and PostgreSQL have two basic functions in common that try to follow Pandas conventions, but add more data type consistency.\n", "\n", - "- [wr.redshift.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.redshift.to_sql.html)\n", - "- [wr.redshift.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.redshift.read_sql_query.html)\n", - "- [wr.mysql.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.mysql.to_sql.html)\n", - "- [wr.mysql.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.mysql.read_sql_query.html)\n", - "- [wr.postgresql.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.postgresql.to_sql.html)\n", - "- [wr.postgresql.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.postgresql.read_sql_query.html)\n", - "- [wr.sqlserver.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.sqlserver.to_sql.html)\n", - "- [wr.sqlserver.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.sqlserver.read_sql_query.html)\n", - "- [wr.oracle.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.oracle.to_sql.html)\n", - "- [wr.oracle.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.oracle.read_sql_query.html)" + "- [wr.redshift.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.redshift.to_sql.html)\n", + "- [wr.redshift.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.redshift.read_sql_query.html)\n", + "- [wr.mysql.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.mysql.to_sql.html)\n", + "- [wr.mysql.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.mysql.read_sql_query.html)\n", + "- [wr.postgresql.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.postgresql.to_sql.html)\n", + "- [wr.postgresql.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.postgresql.read_sql_query.html)\n", + "- [wr.sqlserver.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.sqlserver.to_sql.html)\n", + "- [wr.sqlserver.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.sqlserver.read_sql_query.html)\n", + "- [wr.oracle.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.oracle.to_sql.html)\n", + "- [wr.oracle.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.oracle.read_sql_query.html)" ] }, { @@ -51,11 +51,11 @@ "source": [ "## Connect using the Glue Catalog Connections\n", "\n", - "- [wr.redshift.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.redshift.connect.html)\n", - "- [wr.mysql.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.mysql.connect.html)\n", - "- [wr.postgresql.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.postgresql.connect.html)\n", - "- [wr.sqlserver.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.sqlserver.connect.html)\n", - "- [wr.oracle.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.oracle.connect.html)" + "- [wr.redshift.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.redshift.connect.html)\n", + "- [wr.mysql.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.mysql.connect.html)\n", + "- [wr.postgresql.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.postgresql.connect.html)\n", + "- [wr.sqlserver.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.sqlserver.connect.html)\n", + "- [wr.oracle.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.oracle.connect.html)" ] }, { diff --git a/tutorials/014 - Schema Evolution.ipynb b/tutorials/014 - Schema Evolution.ipynb index 7616d95ab..b82d4c614 100644 --- a/tutorials/014 - Schema Evolution.ipynb +++ b/tutorials/014 - Schema Evolution.ipynb @@ -10,9 +10,9 @@ "\n", "awswrangler supports new **columns** on Parquet and CSV datasets through:\n", "\n", - "- [wr.s3.to_parquet()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", - "- [wr.s3.store_parquet_metadata()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", - "- [wr.s3.to_csv()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" + "- [wr.s3.to_parquet()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", + "- [wr.s3.store_parquet_metadata()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", + "- [wr.s3.to_csv()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" ] }, { diff --git a/tutorials/021 - Global Configurations.ipynb b/tutorials/021 - Global Configurations.ipynb index e773929f8..59e7275f6 100644 --- a/tutorials/021 - Global Configurations.ipynb +++ b/tutorials/021 - Global Configurations.ipynb @@ -13,7 +13,7 @@ "- **Environment variables**\n", "- **wr.config**\n", "\n", - "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", + "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", "\n", "*P.P.S. One exception to the above mentioned rules is the `botocore_config` property. It cannot be set through environment variables\n", "but only via `wr.config`. It will be used as the `botocore.config.Config` for all underlying `boto3` calls.\n", diff --git a/tutorials/022 - Writing Partitions Concurrently.ipynb b/tutorials/022 - Writing Partitions Concurrently.ipynb index 4e49d9a83..1fc758d50 100644 --- a/tutorials/022 - Writing Partitions Concurrently.ipynb +++ b/tutorials/022 - Writing Partitions Concurrently.ipynb @@ -13,7 +13,7 @@ " If True will increase the parallelism level during the partitions writing. It will decrease the\n", " writing time and increase memory usage.\n", "\n", - "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { diff --git a/tutorials/023 - Flexible Partitions Filter.ipynb b/tutorials/023 - Flexible Partitions Filter.ipynb index ba8fc81ce..107568c60 100644 --- a/tutorials/023 - Flexible Partitions Filter.ipynb +++ b/tutorials/023 - Flexible Partitions Filter.ipynb @@ -16,7 +16,7 @@ " - Ignored if `dataset=False`.\n", " \n", "\n", - "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { diff --git a/tutorials/030 - Data Api.ipynb b/tutorials/030 - Data Api.ipynb index 0fed49e7f..6d91f4000 100644 --- a/tutorials/030 - Data Api.ipynb +++ b/tutorials/030 - Data Api.ipynb @@ -26,8 +26,8 @@ "metadata": {}, "source": [ "## Connect to the cluster\n", - "- [wr.data_api.redshift.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.data_api.redshift.connect.html)\n", - "- [wr.data_api.rds.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.data_api.rds.connect.html)" + "- [wr.data_api.redshift.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.data_api.redshift.connect.html)\n", + "- [wr.data_api.rds.connect()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.data_api.rds.connect.html)" ] }, { @@ -60,8 +60,8 @@ "metadata": {}, "source": [ "## Read from database\n", - "- [wr.data_api.redshift.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.data_api.redshift.read_sql_query.html)\n", - "- [wr.data_api.rds.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0b1/stubs/awswrangler.data_api.rds.read_sql_query.html)" + "- [wr.data_api.redshift.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.data_api.redshift.read_sql_query.html)\n", + "- [wr.data_api.rds.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/stubs/awswrangler.data_api.rds.read_sql_query.html)" ] }, {