Skip to content

Commit

Permalink
Fix median calculation in Redshift (#89)
Browse files Browse the repository at this point in the history
* Fix. Issue 84. Median function fixed for Redshift

* Fix. all median calculations need to include the 3rd parameter
  • Loading branch information
odikia authored Jun 10, 2024
1 parent 4a76288 commit 39f3e48
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
6 changes: 3 additions & 3 deletions macros/get_profile.sql
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
{{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,
{%- endif %}
{% if "median" not in exclude_measures -%}
{{ dbt_profiler.measure_median(column_name, data_type) }} as median,
({{ dbt_profiler.measure_median(column_name, data_type, 'source_data') }}) as median,
{%- endif %}
{% if "std_dev_population" not in exclude_measures -%}
{{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,
Expand Down Expand Up @@ -240,7 +240,7 @@
{{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,
{%- endif %}
{% if "median" not in exclude_measures -%}
{{ dbt_profiler.measure_median(column_name, data_type) }} as median,
({{ dbt_profiler.measure_median(column_name, data_type, 'source_data') }}) as median,
{%- endif %}
{% if "std_dev_population" not in exclude_measures -%}
{{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,
Expand Down Expand Up @@ -377,7 +377,7 @@
{{ dbt_profiler.measure_avg(column_name, data_type) }} as avg,
{%- endif %}
{% if "median" not in exclude_measures -%}
{{ dbt_profiler.measure_median(column_name, data_type) }} as median,
({{ dbt_profiler.measure_median(column_name, data_type, 'source_data') }}) as median,
{%- endif %}
{% if "std_dev_population" not in exclude_measures -%}
{{ dbt_profiler.measure_std_dev_population(column_name, data_type) }} as std_dev_population,
Expand Down
22 changes: 16 additions & 6 deletions macros/measures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ case when count(distinct {{ adapter.quote(column_name) }}) = count(*) then 1 els

{# measure_median ------------------------------------------------- #}

{%- macro measure_median(column_name, data_type) -%}
{{ return(adapter.dispatch("measure_median", macro_namespace="dbt_profiler")(column_name, data_type)) }}
{%- macro measure_median(column_name, data_type, cte_name) -%}
{{ return(adapter.dispatch("measure_median", macro_namespace="dbt_profiler")(column_name, data_type, cte_name)) }}
{%- endmacro -%}

{%- macro default__measure_median(column_name, data_type) -%}
{%- macro default__measure_median(column_name, data_type, cte_name) -%}

{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}
median({{ adapter.quote(column_name) }})
Expand All @@ -143,7 +143,7 @@ case when count(distinct {{ adapter.quote(column_name) }}) = count(*) then 1 els

{%- endmacro -%}

{%- macro bigquery__measure_median(column_name, data_type) -%}
{%- macro bigquery__measure_median(column_name, data_type, cte_name) -%}

{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}
APPROX_QUANTILES({{ adapter.quote(column_name) }}, 100)[OFFSET(50)]
Expand All @@ -153,7 +153,7 @@ case when count(distinct {{ adapter.quote(column_name) }}) = count(*) then 1 els

{%- endmacro -%}

{%- macro postgres__measure_median(column_name, data_type) -%}
{%- macro postgres__measure_median(column_name, data_type, cte_name) -%}

{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}
percentile_cont(0.5) within group (order by {{ adapter.quote(column_name) }})
Expand All @@ -163,7 +163,17 @@ case when count(distinct {{ adapter.quote(column_name) }}) = count(*) then 1 els

{%- endmacro -%}

{%- macro sql_server__measure_median(column_name, data_type) -%}
{%- macro redshift__measure_median(column_name, data_type, cte_name) -%}

{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}
select percentile_cont(0.5) within group (order by {{ adapter.quote(column_name) }}) from {{ cte_name }}
{%- else -%}
cast(null as {{ dbt.type_numeric() }})
{%- endif -%}

{%- endmacro -%}

{%- macro sql_server__measure_median(column_name, data_type, cte_name) -%}

{%- if dbt_profiler.is_numeric_dtype(data_type) and not dbt_profiler.is_struct_dtype(data_type) -%}
percentile_cont({{ adapter.quote(column_name) }}, 0.5) over ()
Expand Down

0 comments on commit 39f3e48

Please sign in to comment.