Snowplow Fractribution Models
This page is auto-generated from our dbt packages, some information may be incomplete
Snowplow Fractributionโ
Snowplow Fractribution Call Snowpark Macrosโ
models/snowplow_fractribution_call_snowpark_macros.sql
Description
Model to trigger the Snowpark macros for producing the 3 attribution model output tables
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
schema_name | text | |
table_name | text | |
last_run_time | text |
Code
- default
--This model is only used when the attribution package is run on Snowflake and the Python script will be run using Snowpark, rather than manually.
{{ config(
enabled = target.type == "snowflake" and var('snowplow__run_python_script_in_snowpark', false),
materialized = 'table',
)}}
-- depends_on: {{ref('snowplow_fractribution_path_summary')}}
-- depends_on: {{ref('snowplow_fractribution_channel_spend')}}
{{create_report_table_proc()}}
{{run_stored_procedure(var('snowplow__attribution_model_for_snowpark'), var('snowplow__conversion_window_start_date') , var('snowplow__conversion_window_end_date') )}}
With table_1 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_path_summary_with_channels' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
),
table_2 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_channel_attribution' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
),
table_3 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_report_table' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
)
SELECT * FROM table_1
UNION ALL
SELECT * FROM table_2
UNION ALL
SELECT * FROM table_3
{%- if execute %}
{{ log('Finished running stored procedure. Created tables:
snowplow_fractribution_path_summary_with_channels
snowplow_fractribution_channel_attribution
snowplow_fractribution_report_table', info=True)}}
{% endif %}
Depends On
Snowplow Fractribution Channel Countsโ
models/snowplow_fractribution_channel_counts.sql
Description
Number of sessions per channel, campaign, source and medium
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
channel | Channel name | text |
campaign | Campaign name | text |
source | Source / referring host | text |
medium | Marketing medium | text |
number_of_sessions | Count of sessions per channel / campaign / source / medium | number |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
select
channel,
campaign,
source,
medium,
count(*) as number_of_sessions
from {{ ref('snowplow_fractribution_sessions_by_customer_id') }}
group by 1,2,3,4
order by channel, number_of_sessions desc
Depends On
- Models
- Macros
Referenced By
Snowplow Fractribution Channel Spendโ
models/snowplow_fractribution_channel_spend.sql
Description
This model does not currently have a description.
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
channel | text | |
spend | number |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
-- By default, the model assigns an example 10k spend to each channel found in channel_counts
-- TODO: put in your own spend calculations per channel in the channel_spend macro in your own dbt project
{{ channel_spend() }}
Depends On
- Models
- Macros
Referenced By
Snowplow Fractribution Conversions By Customer Idโ
models/snowplow_fractribution_conversions_by_customer_id.sql
Description
Each conversion and associated revenue per customer_id
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
customer_id | Identifier for the customer, 'f' prefixed when domain_userid is used, 'u' prefixed for when user_id is used (logged in?) | text |
conversion_tstamp | timestamp_ntz | |
revenue | Revenue (dollars / cents) for the conversion | number |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
select
case when events.user_id is not null and events.user_id != '' then 'u' || events.user_id -- use event user_id
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
when user_mapping.domain_userid is not null then 'u' || user_mapping.user_id
{% endif %}
else 'f' || events.domain_userid
end as customer_id,
derived_tstamp as conversion_tstamp,
{{ conversion_value() }} as revenue
from {{ var('snowplow__conversions_source' )}} as events
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
left join {{ var('snowplow__web_user_mapping_table') }} as user_mapping
on events.domain_userid = user_mapping.domain_userid
{% endif %}
where {{ conversion_clause() }}
and date(derived_tstamp) >= '{{ get_lookback_date_limits("min", "conversions") }}'
and date(derived_tstamp) <= '{{ get_lookback_date_limits("max", "conversions") }}'
{% if var('snowplow__conversions_source_filter') != '' %}
and date({{ var('snowplow__conversions_source_filter') }}) >= {{ dateadd('day',-var('snowplow__conversions_source_filter_buffer_days'), "'"~get_lookback_date_limits('min', 'conversions')~"'") }}
and date({{ var('snowplow__conversions_source_filter') }}) <= {{ dateadd('day', var('snowplow__conversions_source_filter_buffer_days'),"'"~get_lookback_date_limits('max', 'conversions')~"'") }}
{% endif %}
Depends On
- Macros
Referenced By
Snowplow Fractribution Path Summaryโ
models/snowplow_fractribution_path_summary.sql
Description
For each unique path, a summary of associated conversions, non conversions and revenue
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
transformed_path | > delimited path summary | text |
conversions | Count of conversions for this path | number |
non_conversions | Count of non-conversions for path | number |
revenue | Revenue for the given path | number |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
with paths_to_conversion as (
select
transformed_path,
count(*) as conversions,
sum(revenue) as revenue
from {{ ref('snowplow_fractribution_paths_to_conversion') }}
group by 1
)
, paths_to_non_conversion as (
select
transformed_path,
count(*) as non_conversions
from {{ ref('snowplow_fractribution_paths_to_non_conversion') }}
group by 1
)
select
coalesce(c.transformed_path, n.transformed_path) as transformed_path,
coalesce(c.conversions, 0) as conversions,
coalesce(n.non_conversions, 0) as non_conversions,
c.revenue
from paths_to_conversion c
full join paths_to_non_conversion n
on c.transformed_path = n.transformed_path
Depends On
- Models
- Macros
Referenced By
Snowplow Fractribution Paths To Conversionโ
models/snowplow_fractribution_paths_to_conversion.sql
Description
Customer id and the the paths the customer has followed that have lead to conversion
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
customer_id | Id for the customer (identified or cookie) | text |
conversion_tstamp | timestamp_ntz | |
revenue | Revenue associated with the conversion | number |
path | Path to conversion (> delimited) | text |
transformed_path | Transformations applied to "path" above | text |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
-- Requires macro trim_long_path
with string_aggs as (
select
c.customer_id,
c.conversion_tstamp,
c.revenue,
{{ snowplow_utils.get_string_agg('channel', 's', separator=' > ', sort_numeric=false, order_by_column='visit_start_tstamp', order_by_column_prefix='s') }} as path
from {{ ref('snowplow_fractribution_conversions_by_customer_id') }} c
inner join {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s
on c.customer_id = s.customer_id
and {{ datediff('s.visit_start_tstamp', 'c.conversion_tstamp', 'day') }} >= 0
and {{ datediff('s.visit_start_tstamp', 'c.conversion_tstamp', 'day') }} <= {{ var('snowplow__path_lookback_days') }}
group by 1,2,3
)
{% if target.type not in ('redshift') %}
, arrays as (
select
customer_id,
conversion_tstamp,
revenue,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as path,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as transformed_path
from string_aggs s
)
{{ transform_paths('conversions', 'arrays') }}
select
customer_id,
conversion_tstamp,
revenue,
{{ snowplow_utils.get_array_to_string('path', 'p', ' > ') }} as path,
{{ snowplow_utils.get_array_to_string('transformed_path', 'p', ' > ') }} as transformed_path
from path_transforms p
{% else %}
, strings as (
select
customer_id,
conversion_tstamp,
revenue,
path as path,
path as transformed_path
from string_aggs s
)
{{ transform_paths('conversions', 'strings') }}
select *
from path_transforms p
{% endif %}
Depends On
- Models
- Macros
Referenced By
Snowplow Fractribution Paths To Non Conversionโ
models/snowplow_fractribution_paths_to_non_conversion.sql
Description
Customer id and the the paths the customer has followed that have not lead to conversion
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
customer_id | Id for the customer (identified or cookie) | text |
path | Path to conversion (> delimited) | text |
transformed_path | Transformations applied to "path" above | text |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
-- Requires macro trim_long_path
with non_conversions as (
select
customer_id,
max(visit_start_tstamp) as non_conversion_tstamp
from {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s
where not exists (select customer_id from {{ ref('snowplow_fractribution_conversions_by_customer_id') }} c where s.customer_id = c.customer_id)
group by 1
)
, string_aggs as (
select
n.customer_id,
{{ snowplow_utils.get_string_agg('channel', 's', separator=' > ', order_by_column='visit_start_tstamp', sort_numeric=false, order_by_column_prefix='s') }} as path
from non_conversions n
inner join {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s
on n.customer_id = s.customer_id
and {{ datediff('s.visit_start_tstamp', 'n.non_conversion_tstamp', 'day') }} >= 0
and {{ datediff('s.visit_start_tstamp', 'n.non_conversion_tstamp', 'day') }} <= {{ var('snowplow__path_lookback_days') }}
group by 1
)
{% if target.type not in ('redshift') %}
, arrays as (
select
customer_id,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as path,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as transformed_path
from string_aggs s
)
{{ transform_paths('non_conversions', 'arrays') }}
select
customer_id,
{{ snowplow_utils.get_array_to_string('path', 'p', ' > ') }} as path,
{{ snowplow_utils.get_array_to_string('transformed_path', 'p', ' > ') }} as transformed_path
from path_transforms p
{% else %}
, strings as (
select
customer_id,
path as path,
path as transformed_path
from string_aggs s
)
{{ transform_paths('non_conversions', 'strings') }}
select *
from path_transforms p
{% endif %}
Depends On
- Models
- Macros
Referenced By
Snowplow Fractribution Sessions By Customer Idโ
models/snowplow_fractribution_sessions_by_customer_id.sql
Description
Channels per session by customer id, yields one row per session unless snowplow__consider_intrasession_channels is true
Type: Table
Details
Columns
Column Name | Description | Type |
---|---|---|
customer_id | Customer id | text |
visit_start_tstamp | UTC timestamp for the start of the session | timestamp_ntz |
channel | Channel | text |
referral_path | Referall path for the session | text |
campaign | Marketing campaign | text |
source | Marketing source | text |
medium | Marketing medium | text |
Code
- default
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}
-- restrict to certain hostnames
{% if var('snowplow__conversion_hosts') in ('', [], '[]') or var('snowplow__conversion_hosts') == None %}
{{ exceptions.raise_compiler_error("Error: var('conversion_host') needs to be set!") }}
{% endif %}
with base_data as (
select
case when page_views.user_id is not null and page_views.user_id != '' then 'u' || page_views.user_id -- use event user_id
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
when user_mapping.domain_userid is not null then 'u' || user_mapping.user_id
{% endif %}
else 'f' || page_views.domain_userid
end as customer_id, -- f (anonymous) or u (identifier) prefixed user identifier
derived_tstamp as visit_start_tstamp, -- we consider the event timestamp to be the session start, rather than the session start timestamp
{{ channel_classification() }} as channel,
refr_urlpath as referral_path,
mkt_campaign as campaign,
mkt_source as source,
mkt_medium as medium
from {{ var('snowplow__page_views_source') }} page_views
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
left join {{ var('snowplow__web_user_mapping_table') }} as user_mapping
on page_views.domain_userid = user_mapping.domain_userid
{% endif %}
where date(derived_tstamp) >= '{{ get_lookback_date_limits("min", "sessions") }}'
and date(derived_tstamp) <= '{{ get_lookback_date_limits("max", "sessions") }}'
and
-- restrict to certain hostnames
{% if var('snowplow__conversion_hosts') in ('', [], '[]') or var('snowplow__conversion_hosts') == None %}
{{ exceptions.raise_compiler_error("Error: var('conversion_host') needs to be set!") }}
{% endif %}
page_urlhost in ({{ snowplow_utils.print_list(var('snowplow__conversion_hosts')) }})
{% if var('snowplow__consider_intrasession_channels') %}
-- yields one row per channel change
and mkt_medium is not null and mkt_medium != ''
{% else %}
-- yields one row per session (last touch)
and page_view_in_session_index = 1 -- takes the first page view in the session
{% endif %}
)
select
*
from
base_data
{% if var('snowplow__channels_to_exclude') and var('snowplow__channels_to_include') %}
-- Filters out any unwanted channels
where channel not in ({{ snowplow_utils.print_list(var('snowplow__channels_to_exclude')) }})
and channel in ({{ snowplow_utils.print_list(var('snowplow__channels_to_include')) }})
{% elif var('snowplow__channels_to_exclude') %}
-- Filters out any unwanted channels
where channel not in ({{ snowplow_utils.print_list(var('snowplow__channels_to_exclude')) }})
{% elif var('snowplow__channels_to_include') %}
-- Filters out any unwanted channels
where channel in ({{ snowplow_utils.print_list(var('snowplow__channels_to_include')) }})
{% endif %}
Depends On
- Macros