Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve "get_column_values returns an error if passed a ref, but the model has not been previously built" #152

Merged
merged 2 commits into from
Jul 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,12 @@ models:
### SQL helpers
#### get_column_values ([source](macros/sql/get_column_values.sql))
This macro returns the unique values for a column in a given table.
It takes an options `default` argument for compiling when relation does not already exist.

Usage:
```
-- Returns a list of the top 50 states in the `users` table
{% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50) %}
{% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50, default=[]) %}

{% for state in states %}
...
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/Makefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@

test-postgres:
dbt compile --target postgres
dbt seed --target postgres --full-refresh
dbt run --target postgres --full-refresh --exclude test_insert_by_period
dbt test --target postgres --exclude test_insert_by_period

test-redshift:
dbt compile --target redshift
dbt seed --target redshift --full-refresh
dbt run --target redshift --full-refresh
dbt test --target redshift

test-snowflake:
dbt compile --target snowflake
dbt seed --target snowflake --full-refresh
dbt run --target snowflake --full-refresh
dbt test --target snowflake

test-bigquery:
dbt compile --target bigquery
dbt seed --target bigquery --full-refresh
dbt run --target bigquery --full-refresh
dbt test --target bigquery
Expand Down
4 changes: 1 addition & 3 deletions integration_tests/models/sql/test_get_column_values.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@

{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field') %}
{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = []) %}


{% if target.type == 'snowflake' %}

select
{% set columns = columns if columns is iterable else [] %}
{% for column in columns -%}

sum(case when field = '{{ column }}' then 1 else 0 end) as count_{{ column }}
Expand All @@ -18,7 +17,6 @@ from {{ ref('data_get_column_values') }}
{% else %}

select
{% set columns = columns if columns is iterable else [] %}
{% for column in columns -%}

{{dbt_utils.safe_cast("sum(case when field = '" ~ column ~ "' then 1 else 0 end)", dbt_utils.type_string()) }} as count_{{ column }}
Expand Down
45 changes: 34 additions & 11 deletions macros/sql/get_column_values.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

{#
This macro fetches the unique values for `column` in the table `table`

Expand All @@ -11,19 +10,43 @@ Returns:
A list of distinct values for the specified columns
#}

{% macro get_column_values(table, column, max_records=none) -%}
{% macro get_column_values(table, column, max_records=none, default=none) -%}

{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
{%- if not execute -%}
{{ return('') }}
{% endif %}
{#-- #}

{%- set target_relation = adapter.get_relation(database=table.database,
schema=table.schema,
identifier=table.identifier) -%}

{%- call statement('get_column_values', fetch_result=true) %}

{%- if not target_relation and default is none -%}

{{ exceptions.raise_compiler_error("In get_column_values(): relation " ~ table ~ " does not exist and no default value was provided.") }}

{%- elif not target_relation and default is not none -%}

{{ log("Relation " ~ table ~ " does not exist. Returning the default value: " ~ default) }}

{{ return(default) }}

{%- else -%}

{%- call statement('get_column_values', fetch_result=True) %}
select
{{ column }} as value

select
{{ column }} as value
from {{ target_relation }}
group by 1
order by count(*) desc

from {{ table }}
group by 1
order by count(*) desc
{% if max_records is not none %}
limit {{ max_records }}
{% endif %}

{% if max_records is not none %}
limit {{ max_records }}
{% endif %}

{%- endcall -%}
Expand All @@ -34,7 +57,7 @@ Returns:
{%- set values = value_list['data'] | map(attribute=0) | list %}
{{ return(values) }}
{%- else -%}
{{ return([]) }}
{{ return(default) }}
{%- endif -%}

{%- endmacro %}