From a4f92a8fb512868e5e151356bdb4bb872d18f0eb Mon Sep 17 00:00:00 2001 From: Zach Liu Date: Mon, 5 Aug 2024 18:43:13 -0400 Subject: [PATCH 1/3] Add data type to redshift query runner (#7109) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- redash/query_runner/pg.py | 3 ++- tests/query_runner/test_pg.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index ca071bb627..c7ddef1eb7 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -388,12 +388,13 @@ def _get_tables(self, schema): SELECT DISTINCT table_name, table_schema, column_name, + data_type, ordinal_position AS pos FROM svv_columns WHERE table_schema NOT IN ('pg_internal','pg_catalog','information_schema') AND table_schema NOT LIKE 'pg_temp_%' ) - SELECT table_name, table_schema, column_name + SELECT table_name, table_schema, column_name, data_type FROM tables WHERE HAS_SCHEMA_PRIVILEGE(table_schema, 'USAGE') AND diff --git a/tests/query_runner/test_pg.py b/tests/query_runner/test_pg.py index 634547be04..e72001b0a9 100644 --- a/tests/query_runner/test_pg.py +++ b/tests/query_runner/test_pg.py @@ -25,3 +25,19 @@ def test_handles_dups_between_public_and_other_schemas(self): self.assertListEqual(schema["main.users"]["columns"], ["id", "name"]) self.assertIn('public."main.users"', schema.keys()) self.assertListEqual(schema['public."main.users"']["columns"], ["id"]) + + def test_build_schema_with_data_types(self): + results = { + "rows": [ + {"table_schema": "main", "table_name": "users", "column_name": "id", "data_type": "integer"}, + {"table_schema": "main", "table_name": "users", "column_name": "name", "data_type": "varchar"}, + ] + } + + schema = {} + + build_schema(results, schema) + + self.assertListEqual( + schema["main.users"]["columns"], [{"name": "id", "type": "integer"}, {"name": "name", "type": "varchar"}] + ) From b1fe2d4162c0f148bd43ff6c96423b5f7604d98f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:05:21 +1000 Subject: [PATCH 2/3] Bump sentry-sdk from 1.28.1 to 2.8.0 (#7069) The Dependabot alert for sentry-sdk says that the security fix has been backported to the 1.x series as well, in version 1.45.1. So, lets use that as it should be more compatible that jumping to a new major series version. --- poetry.lock | 13 +++++++++---- pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4847911832..43eea9596b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4282,13 +4282,13 @@ files = [ [[package]] name = "sentry-sdk" -version = "1.28.1" +version = "1.45.1" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = "*" files = [ - {file = "sentry-sdk-1.28.1.tar.gz", hash = "sha256:dcd88c68aa64dae715311b5ede6502fd684f70d00a7cd4858118f0ba3153a3ae"}, - {file = "sentry_sdk-1.28.1-py2.py3-none-any.whl", hash = "sha256:6bdb25bd9092478d3a817cb0d01fa99e296aea34d404eac3ca0037faa5c2aa0a"}, + {file = "sentry_sdk-1.45.1-py2.py3-none-any.whl", hash = "sha256:608887855ccfe39032bfd03936e3a1c4f4fc99b3a4ac49ced54a4220de61c9c1"}, + {file = "sentry_sdk-1.45.1.tar.gz", hash = "sha256:a16c997c0f4e3df63c0fc5e4207ccb1ab37900433e0f72fef88315d317829a26"}, ] [package.dependencies] @@ -4298,10 +4298,13 @@ urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} [package.extras] aiohttp = ["aiohttp (>=3.5)"] arq = ["arq (>=0.23)"] +asyncpg = ["asyncpg (>=0.23)"] beam = ["apache-beam (>=2.12)"] bottle = ["bottle (>=0.12.13)"] celery = ["celery (>=3)"] +celery-redbeat = ["celery-redbeat (>=2)"] chalice = ["chalice (>=1.16.0)"] +clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] django = ["django (>=1.8)"] falcon = ["falcon (>=1.4)"] fastapi = ["fastapi (>=0.79.0)"] @@ -4310,7 +4313,9 @@ grpcio = ["grpcio (>=1.21.1)"] httpx = ["httpx (>=0.16.0)"] huey = ["huey (>=2)"] loguru = ["loguru (>=0.5)"] +openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"] opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] pure-eval = ["asttokens", "executing", "pure-eval"] pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] @@ -5330,4 +5335,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "2f392e4b1cf2dd6c455462028ce8347e698a13a1b26ebe8449d71800bb925f25" +content-hash = "2c69b95239066cb9f899b20bdb61adc1f3dd8591414d955ea134ac049b962e01" diff --git a/pyproject.toml b/pyproject.toml index 898e7dd367..169ba7c4c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ restrictedpython = "6.2" rq = "1.16.1" rq-scheduler = "0.13.1" semver = "2.8.1" -sentry-sdk = "1.28.1" +sentry-sdk = "1.45.1" sqlalchemy = "1.3.24" sqlalchemy-searchable = "1.2.0" sqlalchemy-utils = "0.38.3" From 285c2b6e567f1b7bb26dc55c6c7e528bd801c6dc Mon Sep 17 00:00:00 2001 From: Zach Liu Date: Tue, 6 Aug 2024 23:36:58 -0400 Subject: [PATCH 3/3] Add data type to athena query runner (#7112) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- redash/query_runner/athena.py | 22 ++++++++++++++++------ tests/query_runner/test_athena.py | 21 +++++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 0d70a2a0e3..8ff9b3d1af 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -199,10 +199,20 @@ def __get_schema_from_glue(self, catalog_id=""): logger.warning("Glue table doesn't have StorageDescriptor: %s", table_name) continue if table_name not in schema: - column = [columns["Name"] for columns in table["StorageDescriptor"]["Columns"]] - schema[table_name] = {"name": table_name, "columns": column} - for partition in table.get("PartitionKeys", []): - schema[table_name]["columns"].append(partition["Name"]) + schema[table_name] = {"name": table_name, "columns": []} + + for column_data in table["StorageDescriptor"]["Columns"]: + column = { + "name": column_data["Name"], + "type": column_data["Type"] if "Type" in column_data else None, + } + schema[table_name]["columns"].append(column) + for partition in table.get("PartitionKeys", []): + partition_column = { + "name": partition["Name"], + "type": partition["Type"] if "Type" in partition else None, + } + schema[table_name]["columns"].append(partition_column) return list(schema.values()) def get_schema(self, get_stats=False): @@ -212,7 +222,7 @@ def get_schema(self, get_stats=False): schema = {} query = """ - SELECT table_schema, table_name, column_name + SELECT table_schema, table_name, column_name, data_type FROM information_schema.columns WHERE table_schema NOT IN ('information_schema') """ @@ -225,7 +235,7 @@ def get_schema(self, get_stats=False): table_name = "{0}.{1}".format(row["table_schema"], row["table_name"]) if table_name not in schema: schema[table_name] = {"name": table_name, "columns": []} - schema[table_name]["columns"].append(row["column_name"]) + schema[table_name]["columns"].append({"name": row["column_name"], "type": row["data_type"]}) return list(schema.values()) diff --git a/tests/query_runner/test_athena.py b/tests/query_runner/test_athena.py index 6cda21c03b..6027c8c8a2 100644 --- a/tests/query_runner/test_athena.py +++ b/tests/query_runner/test_athena.py @@ -75,7 +75,9 @@ def test_external_table(self): {"DatabaseName": "test1"}, ) with self.stubber: - assert query_runner.get_schema() == [{"columns": ["row_id"], "name": "test1.jdbc_table"}] + assert query_runner.get_schema() == [ + {"columns": [{"name": "row_id", "type": "int"}], "name": "test1.jdbc_table"} + ] def test_partitioned_table(self): """ @@ -124,7 +126,12 @@ def test_partitioned_table(self): {"DatabaseName": "test1"}, ) with self.stubber: - assert query_runner.get_schema() == [{"columns": ["sk", "category"], "name": "test1.partitioned_table"}] + assert query_runner.get_schema() == [ + { + "columns": [{"name": "sk", "type": "int"}, {"name": "category", "type": "int"}], + "name": "test1.partitioned_table", + } + ] def test_view(self): query_runner = Athena({"glue": True, "region": "mars-east-1"}) @@ -156,7 +163,7 @@ def test_view(self): {"DatabaseName": "test1"}, ) with self.stubber: - assert query_runner.get_schema() == [{"columns": ["sk"], "name": "test1.view"}] + assert query_runner.get_schema() == [{"columns": [{"name": "sk", "type": "int"}], "name": "test1.view"}] def test_dodgy_table_does_not_break_schema_listing(self): """ @@ -196,7 +203,9 @@ def test_dodgy_table_does_not_break_schema_listing(self): {"DatabaseName": "test1"}, ) with self.stubber: - assert query_runner.get_schema() == [{"columns": ["region"], "name": "test1.csv"}] + assert query_runner.get_schema() == [ + {"columns": [{"name": "region", "type": "string"}], "name": "test1.csv"} + ] def test_no_storage_descriptor_table(self): """ @@ -312,6 +321,6 @@ def test_multi_catalog_tables(self): ) with self.stubber: assert query_runner.get_schema() == [ - {"columns": ["row_id"], "name": "test1.jdbc_table"}, - {"columns": ["row_id"], "name": "test2.jdbc_table"}, + {"columns": [{"name": "row_id", "type": "int"}], "name": "test1.jdbc_table"}, + {"columns": [{"name": "row_id", "type": "int"}], "name": "test2.jdbc_table"}, ]