From 83bb2a554f5bcf8d751b4b648a10386abfc8eaf9 Mon Sep 17 00:00:00 2001 From: Dustin Weaver Date: Tue, 15 Nov 2022 10:47:47 -0500 Subject: [PATCH] Add `where_clause` argument to macros (#57) * Fixes #56 * Added unit tests * Updates per review comments --- README.md | 7 ++++- .../models/profile_where_clause.sql | 8 ++++++ .../models/profile_where_clause.yml | 9 ++++++ macros/get_profile.sql | 28 ++++++++++++------- macros/get_profile_table.sql | 4 +-- macros/print_profile.sql | 4 +-- macros/print_profile_docs.sql | 4 +-- macros/print_profile_schema.sql | 4 +-- 8 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 integration_tests/models/profile_where_clause.sql create mode 100644 integration_tests/models/profile_where_clause.yml diff --git a/README.md b/README.md index 399c47c..ce888d9 100644 --- a/README.md +++ b/README.md @@ -124,13 +124,14 @@ This macro returns a relation profile as a SQL query that can be used in a dbt m * `exclude_measures` (optional): List of measures to exclude from the profile (default: `[]`) * `include_columns` (optional): List of columns to include in the profile (default: `[]` i.e., all). Only one of `include_columns` and `exclude_columns` can be specified at a time. * `exclude_columns` (optional): List of columns to exclude from the profile (default: `[]`). Only one of `include_columns` and `exclude_columns` can be specified at a time. +* `where_clause` (optional): SQL `WHERE` clause to allow exclustion of records from profiler. ### Usage Use this macro in a dbt model, using a [ref()](https://docs.getdbt.com/reference/dbt-jinja-functions/ref): ```sql -{{ dbt_profiler.get_profile(relation=ref("customers")) }} +{{ dbt_profiler.get_profile(relation=ref("customers"), where_clause="is_active = true") }} ``` Use this macro in a dbt model, using a [source()](https://docs.getdbt.com/reference/dbt-jinja-functions/source): @@ -160,6 +161,7 @@ This macro returns a relation profile as an [agate.Table](https://agate.readthed * `exclude_measures` (optional): List of measures to exclude from the profile (default: `[]`) * `include_columns` (optional): List of columns to include in the profile (default: `[]` i.e., all). Only one of `include_columns` and `exclude_columns` can be specified at a time. * `exclude_columns` (optional): List of columns to exclude from the profile (default: `[]`). Only one of `include_columns` and `exclude_columns` can be specified at a time. +* `where_clause` (optional): SQL where clause to allow exclustion of records from profiler. This is done after the `WHERE` keyword. ### Usage @@ -187,6 +189,7 @@ This macro prints a relation profile as a Markdown table to `stdout`. * `max_columns` (optional): The maximum number of columns to display before truncating the data (default: `7`) * `max_column_width` (optional): Truncate all columns to at most this width (default: `30`) * `max_precision` (optional): Puts a limit on the maximum precision displayed for number types (default: `none` i.e., not limited) +* `where_clause` (optional): SQL where clause to allow exclustion of records from profiler. This is done after the `WHERE` keyword. ### Usage Call the macro as an [operation](https://docs.getdbt.com/docs/using-operations): @@ -228,6 +231,7 @@ This macro prints a relation schema YAML to `stdout` containing all columns and * `exclude_columns` (optional): List of columns to exclude from the profile (default: `[]`). Only one of `include_columns` and `exclude_columns` can be specified at a time. * `model_description` (optional): Model description included in the schema (default: `""`) * `column_description` (optional): Column descriptions included in the schema (default: `""`) +* `where_clause` (optional): SQL where clause to allow exclustion of records from profiler. This is done after the `WHERE` keyword. ### Usage Call the macro as an [operation](https://docs.getdbt.com/docs/using-operations): @@ -345,6 +349,7 @@ This macro prints a relation profile as a Markdown table wrapped in a Jinja `doc * `max_columns` (optional): The maximum number of columns to display before truncating the data (default: `7`) * `max_column_width` (optional): Truncate all columns to at most this width (default: `30`) * `max_precision` (optional): Puts a limit on the maximum precision displayed for number types (default: `none` i.e., not limited) +* `where_clause` (optional): SQL where clause to allow exclustion of records from profiler. This is done after the `WHERE` keyword. ### Usage diff --git a/integration_tests/models/profile_where_clause.sql b/integration_tests/models/profile_where_clause.sql new file mode 100644 index 0000000..67bfc5b --- /dev/null +++ b/integration_tests/models/profile_where_clause.sql @@ -0,0 +1,8 @@ +-- depends_on: {{ ref("test_data") }} +{% if execute %} + {%- set where_clause = "string_not_nullable = 'one'" -%} + {%- if target.type == "snowflake" -%} + {%- set where_clause = where_clause -%} + {%- endif -%} + {{ dbt_profiler.get_profile(relation=ref("test_data"), where_clause=where_clause) }} +{% endif %} \ No newline at end of file diff --git a/integration_tests/models/profile_where_clause.yml b/integration_tests/models/profile_where_clause.yml new file mode 100644 index 0000000..e704aad --- /dev/null +++ b/integration_tests/models/profile_where_clause.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: profile_exclude_columns + columns: + - name: string_not_nullable + tests: + - dbt_expectations.expect_column_values_to_match_like_pattern: + like_patter: ["one"] \ No newline at end of file diff --git a/macros/get_profile.sql b/macros/get_profile.sql index ae2ae75..2efdfdb 100644 --- a/macros/get_profile.sql +++ b/macros/get_profile.sql @@ -1,10 +1,10 @@ -{% macro get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %} - {{ return(adapter.dispatch("get_profile", macro_namespace="dbt_profiler")(relation, exclude_measures, include_columns, exclude_columns)) }} +{% macro get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %} + {{ return(adapter.dispatch("get_profile", macro_namespace="dbt_profiler")(relation, exclude_measures, include_columns, exclude_columns, where_clause)) }} {% endmacro %} -{% macro default__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %} +{% macro default__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %} {%- if include_columns and exclude_columns -%} {{ exceptions.raise_compiler_error("Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.") }} @@ -36,7 +36,7 @@ {{ log("Relation columns: " ~ relation_column_names | join(', '), info=False) }} {%- if include_columns -%} - {%- set profile_column_names = relation_column_names | select("in", include_columns) | list-%} + {%- set profile_column_names = relation_column_names | select("in", include_columns) | list -%} {%- elif exclude_columns -%} {%- set profile_column_names = relation_column_names | reject("in", exclude_columns) | list -%} {%- else -%} @@ -101,6 +101,9 @@ cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at, {{ loop.index }} as _column_position from source_data + {% if where_clause %} + where {{ where_clause }} + {% endif %} {% if not loop.last %}union all{% endif %} {% endfor %} @@ -124,7 +127,7 @@ -{% macro databricks__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %} +{% macro databricks__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %} {%- if include_columns and exclude_columns -%} {{ exceptions.raise_compiler_error("Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.") }} @@ -156,7 +159,7 @@ {{ log("Relation columns: " ~ relation_column_names | join(', '), info=False) }} {%- if include_columns -%} - {%- set profile_column_names = relation_column_names | select("in", include_columns) | list-%} + {%- set profile_column_names = relation_column_names | select("in", include_columns) | list -%} {%- elif exclude_columns -%} {%- set profile_column_names = relation_column_names | reject("in", exclude_columns) | list -%} {%- else -%} @@ -226,7 +229,9 @@ cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at, {{ loop.index }} as _column_position from source_data - + {% if where_clause %} + where {{ where_clause }} + {% endif %} {% if not loop.last %}union all{% endif %} {% endfor %} ) @@ -251,7 +256,7 @@ -{% macro sqlserver__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[]) %} +{% macro sqlserver__get_profile(relation, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %} {%- if include_columns and exclude_columns -%} {{ exceptions.raise_compiler_error("Both include_columns and exclude_columns arguments were provided to the `get_profile` macro. Only one is allowed.") }} @@ -283,7 +288,7 @@ {{ log("Relation columns: " ~ relation_column_names | join(', '), info=False) }} {%- if include_columns -%} - {%- set profile_column_names = relation_column_names | select("in", include_columns) | list-%} + {%- set profile_column_names = relation_column_names | select("in", include_columns) | list -%} {%- elif exclude_columns -%} {%- set profile_column_names = relation_column_names | reject("in", exclude_columns) | list -%} {%- else -%} @@ -348,6 +353,9 @@ cast(current_timestamp as {{ dbt_profiler.type_string() }}) as profiled_at, {{ loop.index }} as _column_position from source_data + {% if where_clause %} + where {{ where_clause }} + {% endif %} {% if not loop.last %}union all{% endif %} {% endfor %} @@ -367,4 +375,4 @@ {% do return(profile_sql) %} {% endif %} -{% endmacro %} \ No newline at end of file +{% endmacro %} diff --git a/macros/get_profile_table.sql b/macros/get_profile_table.sql index 70e58f4..d6b83ea 100644 --- a/macros/get_profile_table.sql +++ b/macros/get_profile_table.sql @@ -1,4 +1,4 @@ -{% macro get_profile_table(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[]) %} +{% macro get_profile_table(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], where_clause=none) %} {%- set relation = dbt_profiler.get_relation( relation=relation, @@ -6,7 +6,7 @@ schema=schema, database=database ) -%} -{%- set profile_sql = dbt_profiler.get_profile(relation=relation, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns) -%} +{%- set profile_sql = dbt_profiler.get_profile(relation=relation, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%} {{ log(profile_sql, info=False) }} {% set results = run_query(profile_sql) %} {% set results = results.rename(results.column_names | map('lower')) %} diff --git a/macros/print_profile.sql b/macros/print_profile.sql index 1a74b34..9580c37 100644 --- a/macros/print_profile.sql +++ b/macros/print_profile.sql @@ -1,6 +1,6 @@ -{% macro print_profile(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none) %} +{% macro print_profile(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none, where_clause=none) %} -{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns) -%} +{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%} {% if execute %} {% do results.print_table(max_rows=max_rows, max_columns=max_columns, max_column_width=max_column_width, max_precision=max_precision) %} diff --git a/macros/print_profile_docs.sql b/macros/print_profile_docs.sql index 49136ef..60acb2f 100644 --- a/macros/print_profile_docs.sql +++ b/macros/print_profile_docs.sql @@ -1,6 +1,6 @@ -{% macro print_profile_docs(relation=none, relation_name=none, docs_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none) %} +{% macro print_profile_docs(relation=none, relation_name=none, docs_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], max_rows=none, max_columns=13, max_column_width=30, max_precision=none, where_clause=none) %} -{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns) -%} +{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%} {% if docs_name is none %} {% set docs_name = 'dbt_profiler__' + relation_name %} diff --git a/macros/print_profile_schema.sql b/macros/print_profile_schema.sql index 5bc658c..a63d268 100644 --- a/macros/print_profile_schema.sql +++ b/macros/print_profile_schema.sql @@ -1,7 +1,7 @@ -{% macro print_profile_schema(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], model_description="", column_description="") %} +{% macro print_profile_schema(relation=none, relation_name=none, schema=none, database=none, exclude_measures=[], include_columns=[], exclude_columns=[], model_description="", column_description="", where_clause=none) %} {%- set column_dicts = [] -%} -{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns) -%} +{%- set results = dbt_profiler.get_profile_table(relation=relation, relation_name=relation_name, schema=schema, database=database, exclude_measures=exclude_measures, include_columns=include_columns, exclude_columns=exclude_columns, where_clause=where_clause) -%} {% if execute %} {% for row in results.rows %}