From 03036dba1e33f06078feeb01f9b0008790160f0d Mon Sep 17 00:00:00 2001 From: Simo Tumelius Date: Tue, 17 May 2022 18:46:35 +0300 Subject: [PATCH] Improve profiler performance in Snowflake by reducing table scans using a single CTE from which column profiles are calculated (#48) Co-authored-by: Simo Tumelius --- .gitignore | 1 + macros/get_profile.sql | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 4811d0b..8f3a9dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ dbt_modules/ +dbt_packages/ logs/ venv/ diff --git a/macros/get_profile.sql b/macros/get_profile.sql index 2b31fcc..ef46a3b 100644 --- a/macros/get_profile.sql +++ b/macros/get_profile.sql @@ -50,7 +50,13 @@ {{ log("Column data types: " ~ data_type_map, info=False) }} {% set profile_sql %} - with column_profiles as ( + with source_data as ( + select + * + from {{ relation }} + ), + + column_profiles as ( {% for column_name in profile_column_names %} {% set data_type = data_type_map.get(column_name.lower(), "") %} select @@ -88,7 +94,7 @@ {%- endif %} cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at, {{ loop.index }} as _column_position - from {{ relation }} + from source_data {% if not loop.last %}union all{% endif %} {% endfor %}