Skip to main content

Snowplow Fractribution Models

caution

This page is auto-generated from our dbt packages, some information may be incomplete

Snowplow Fractributionโ€‹

Snowplow Fractribution Call Snowpark Macrosโ€‹

models/snowplow_fractribution_call_snowpark_macros.sql

Description

Model to trigger the Snowpark macros for producing the 3 attribution model output tables

Type: Table

Details

Columns
Column NameDescriptionType
schema_nametext
table_nametext
last_run_timetext
Code
Source
--This model is only used when the attribution package is run on Snowflake and the Python script will be run using Snowpark, rather than manually.
{{ config(
enabled = target.type == "snowflake" and var('snowplow__run_python_script_in_snowpark', false),
materialized = 'table',
)}}

-- depends_on: {{ref('snowplow_fractribution_path_summary')}}
-- depends_on: {{ref('snowplow_fractribution_channel_spend')}}

{{create_report_table_proc()}}
{{run_stored_procedure(var('snowplow__attribution_model_for_snowpark'), var('snowplow__conversion_window_start_date') , var('snowplow__conversion_window_end_date') )}}
With table_1 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_path_summary_with_channels' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
),
table_2 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_channel_attribution' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
),
table_3 as (
SELECT
'{{schema}}' as schema_name,
'snowplow_fractribution_report_table' as table_name,
'{{dbt_utils.pretty_time(format="%Y-%m-%d %H:%M:%S")}}' as last_run_time
)
SELECT * FROM table_1
UNION ALL
SELECT * FROM table_2
UNION ALL
SELECT * FROM table_3


{%- if execute %}
{{ log('Finished running stored procedure. Created tables:
snowplow_fractribution_path_summary_with_channels
snowplow_fractribution_channel_attribution
snowplow_fractribution_report_table', info=True)}}
{% endif %}

Depends On

Snowplow Fractribution Channel Countsโ€‹

models/snowplow_fractribution_channel_counts.sql

Description

Number of sessions per channel, campaign, source and medium

Type: Table

Details

Columns
Column NameDescriptionType
channelChannel nametext
campaignCampaign nametext
sourceSource / referring hosttext
mediumMarketing mediumtext
number_of_sessionsCount of sessions per channel / campaign / source / mediumnumber
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

select
channel,
campaign,
source,
medium,
count(*) as number_of_sessions

from {{ ref('snowplow_fractribution_sessions_by_customer_id') }}

group by 1,2,3,4

order by channel, number_of_sessions desc

Depends On

Referenced By

Snowplow Fractribution Channel Spendโ€‹

models/snowplow_fractribution_channel_spend.sql

Description

This model does not currently have a description.

Type: Table

Details

Columns
Column NameDescriptionType
channeltext
spendnumber
Code
Source
{{ 
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

-- By default, the model assigns an example 10k spend to each channel found in channel_counts
-- TODO: put in your own spend calculations per channel in the channel_spend macro in your own dbt project


{{ channel_spend() }}

Depends On

Referenced By

Snowplow Fractribution Conversions By Customer Idโ€‹

models/snowplow_fractribution_conversions_by_customer_id.sql

Description

Each conversion and associated revenue per customer_id

Type: Table

Details

Columns
Column NameDescriptionType
customer_idIdentifier for the customer, 'f' prefixed when domain_userid is used, 'u' prefixed for when user_id is used (logged in?)text
conversion_tstamptimestamp_ntz
revenueRevenue (dollars / cents) for the conversionnumber
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

select
case when events.user_id is not null and events.user_id != '' then 'u' || events.user_id -- use event user_id
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
when user_mapping.domain_userid is not null then 'u' || user_mapping.user_id
{% endif %}
else 'f' || events.domain_userid
end as customer_id,
derived_tstamp as conversion_tstamp,
{{ conversion_value() }} as revenue

from {{ var('snowplow__conversions_source' )}} as events

{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
left join {{ var('snowplow__web_user_mapping_table') }} as user_mapping
on events.domain_userid = user_mapping.domain_userid
{% endif %}

where {{ conversion_clause() }}
and date(derived_tstamp) >= '{{ get_lookback_date_limits("min", "conversions") }}'
and date(derived_tstamp) <= '{{ get_lookback_date_limits("max", "conversions") }}'

{% if var('snowplow__conversions_source_filter') != '' %}
and date({{ var('snowplow__conversions_source_filter') }}) >= {{ dateadd('day',-var('snowplow__conversions_source_filter_buffer_days'), "'"~get_lookback_date_limits('min', 'conversions')~"'") }}
and date({{ var('snowplow__conversions_source_filter') }}) <= {{ dateadd('day', var('snowplow__conversions_source_filter_buffer_days'),"'"~get_lookback_date_limits('max', 'conversions')~"'") }}
{% endif %}

Depends On

Referenced By

Snowplow Fractribution Path Summaryโ€‹

models/snowplow_fractribution_path_summary.sql

Description

For each unique path, a summary of associated conversions, non conversions and revenue

Type: Table

Details

Columns
Column NameDescriptionType
transformed_path> delimited path summarytext
conversionsCount of conversions for this pathnumber
non_conversionsCount of non-conversions for pathnumber
revenueRevenue for the given pathnumber
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

with paths_to_conversion as (

select
transformed_path,
count(*) as conversions,
sum(revenue) as revenue

from {{ ref('snowplow_fractribution_paths_to_conversion') }}

group by 1

)

, paths_to_non_conversion as (

select
transformed_path,
count(*) as non_conversions

from {{ ref('snowplow_fractribution_paths_to_non_conversion') }}

group by 1
)

select
coalesce(c.transformed_path, n.transformed_path) as transformed_path,
coalesce(c.conversions, 0) as conversions,
coalesce(n.non_conversions, 0) as non_conversions,
c.revenue

from paths_to_conversion c

full join paths_to_non_conversion n
on c.transformed_path = n.transformed_path

Depends On

Referenced By

Snowplow Fractribution Paths To Conversionโ€‹

models/snowplow_fractribution_paths_to_conversion.sql

Description

Customer id and the the paths the customer has followed that have lead to conversion

Type: Table

Details

Columns
Column NameDescriptionType
customer_idId for the customer (identified or cookie)text
conversion_tstamptimestamp_ntz
revenueRevenue associated with the conversionnumber
pathPath to conversion (> delimited)text
transformed_pathTransformations applied to "path" abovetext
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

-- Requires macro trim_long_path

with string_aggs as (

select
c.customer_id,
c.conversion_tstamp,
c.revenue,
{{ snowplow_utils.get_string_agg('channel', 's', separator=' > ', sort_numeric=false, order_by_column='visit_start_tstamp', order_by_column_prefix='s') }} as path

from {{ ref('snowplow_fractribution_conversions_by_customer_id') }} c

inner join {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s
on c.customer_id = s.customer_id
and {{ datediff('s.visit_start_tstamp', 'c.conversion_tstamp', 'day') }} >= 0
and {{ datediff('s.visit_start_tstamp', 'c.conversion_tstamp', 'day') }} <= {{ var('snowplow__path_lookback_days') }}

group by 1,2,3

)

{% if target.type not in ('redshift') %}

, arrays as (

select
customer_id,
conversion_tstamp,
revenue,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as path,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as transformed_path


from string_aggs s

)

{{ transform_paths('conversions', 'arrays') }}

select
customer_id,
conversion_tstamp,
revenue,
{{ snowplow_utils.get_array_to_string('path', 'p', ' > ') }} as path,
{{ snowplow_utils.get_array_to_string('transformed_path', 'p', ' > ') }} as transformed_path

from path_transforms p

{% else %}

, strings as (

select
customer_id,
conversion_tstamp,
revenue,
path as path,
path as transformed_path

from string_aggs s

)

{{ transform_paths('conversions', 'strings') }}


select *
from path_transforms p


{% endif %}

Depends On

Referenced By

Snowplow Fractribution Paths To Non Conversionโ€‹

models/snowplow_fractribution_paths_to_non_conversion.sql

Description

Customer id and the the paths the customer has followed that have not lead to conversion

Type: Table

Details

Columns
Column NameDescriptionType
customer_idId for the customer (identified or cookie)text
pathPath to conversion (> delimited)text
transformed_pathTransformations applied to "path" abovetext
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

-- Requires macro trim_long_path


with non_conversions as (

select
customer_id,
max(visit_start_tstamp) as non_conversion_tstamp

from {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s

where not exists (select customer_id from {{ ref('snowplow_fractribution_conversions_by_customer_id') }} c where s.customer_id = c.customer_id)

group by 1

)

, string_aggs as (

select
n.customer_id,
{{ snowplow_utils.get_string_agg('channel', 's', separator=' > ', order_by_column='visit_start_tstamp', sort_numeric=false, order_by_column_prefix='s') }} as path

from non_conversions n

inner join {{ ref('snowplow_fractribution_sessions_by_customer_id') }} s
on n.customer_id = s.customer_id
and {{ datediff('s.visit_start_tstamp', 'n.non_conversion_tstamp', 'day') }} >= 0
and {{ datediff('s.visit_start_tstamp', 'n.non_conversion_tstamp', 'day') }} <= {{ var('snowplow__path_lookback_days') }}

group by 1


)

{% if target.type not in ('redshift') %}

, arrays as (

select
customer_id,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as path,
{{ snowplow_utils.get_split_to_array('path', 's', ' > ') }} as transformed_path

from string_aggs s

)

{{ transform_paths('non_conversions', 'arrays') }}

select
customer_id,
{{ snowplow_utils.get_array_to_string('path', 'p', ' > ') }} as path,
{{ snowplow_utils.get_array_to_string('transformed_path', 'p', ' > ') }} as transformed_path

from path_transforms p

{% else %}

, strings as (

select
customer_id,
path as path,
path as transformed_path

from string_aggs s

)

{{ transform_paths('non_conversions', 'strings') }}


select *
from path_transforms p

{% endif %}

Depends On

Referenced By

Snowplow Fractribution Sessions By Customer Idโ€‹

models/snowplow_fractribution_sessions_by_customer_id.sql

Description

Channels per session by customer id, yields one row per session unless snowplow__consider_intrasession_channels is true

Type: Table

Details

Columns
Column NameDescriptionType
customer_idCustomer idtext
visit_start_tstampUTC timestamp for the start of the sessiontimestamp_ntz
channelChanneltext
referral_pathReferall path for the sessiontext
campaignMarketing campaigntext
sourceMarketing sourcetext
mediumMarketing mediumtext
Code
Source
{{
config(
sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt'))
)
}}

-- restrict to certain hostnames
{% if var('snowplow__conversion_hosts') in ('', [], '[]') or var('snowplow__conversion_hosts') == None %}
{{ exceptions.raise_compiler_error("Error: var('conversion_host') needs to be set!") }}
{% endif %}

with base_data as (
select
case when page_views.user_id is not null and page_views.user_id != '' then 'u' || page_views.user_id -- use event user_id
{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
when user_mapping.domain_userid is not null then 'u' || user_mapping.user_id
{% endif %}
else 'f' || page_views.domain_userid
end as customer_id, -- f (anonymous) or u (identifier) prefixed user identifier
derived_tstamp as visit_start_tstamp, -- we consider the event timestamp to be the session start, rather than the session start timestamp
{{ channel_classification() }} as channel,
refr_urlpath as referral_path,
mkt_campaign as campaign,
mkt_source as source,
mkt_medium as medium

from {{ var('snowplow__page_views_source') }} page_views

{% if var('snowplow__use_snowplow_web_user_mapping_table') %}
left join {{ var('snowplow__web_user_mapping_table') }} as user_mapping
on page_views.domain_userid = user_mapping.domain_userid
{% endif %}

where date(derived_tstamp) >= '{{ get_lookback_date_limits("min", "sessions") }}'

and date(derived_tstamp) <= '{{ get_lookback_date_limits("max", "sessions") }}'

and
-- restrict to certain hostnames
{% if var('snowplow__conversion_hosts') in ('', [], '[]') or var('snowplow__conversion_hosts') == None %}
{{ exceptions.raise_compiler_error("Error: var('conversion_host') needs to be set!") }}

{% endif %}
page_urlhost in ({{ snowplow_utils.print_list(var('snowplow__conversion_hosts')) }})

{% if var('snowplow__consider_intrasession_channels') %}
-- yields one row per channel change
and mkt_medium is not null and mkt_medium != ''

{% else %}
-- yields one row per session (last touch)
and page_view_in_session_index = 1 -- takes the first page view in the session
{% endif %}
)

select
*
from
base_data
{% if var('snowplow__channels_to_exclude') and var('snowplow__channels_to_include') %}
-- Filters out any unwanted channels
where channel not in ({{ snowplow_utils.print_list(var('snowplow__channels_to_exclude')) }})
and channel in ({{ snowplow_utils.print_list(var('snowplow__channels_to_include')) }})

{% elif var('snowplow__channels_to_exclude') %}
-- Filters out any unwanted channels
where channel not in ({{ snowplow_utils.print_list(var('snowplow__channels_to_exclude')) }})

{% elif var('snowplow__channels_to_include') %}
-- Filters out any unwanted channels
where channel in ({{ snowplow_utils.print_list(var('snowplow__channels_to_include')) }})

{% endif %}

Depends On

Referenced By

Was this page helpful?