Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add table for Android Client LTV #4868

Merged
merged 10 commits into from
Jan 24, 2024
Merged
87 changes: 87 additions & 0 deletions sql/moz-fx-data-shared-prod/fenix/client_ltv/view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
-- Params Note: Set these same values in fenix.ltv_states
{% set max_weeks = 32 %}
{% set death_time = 168 %}
{% set lookback = 28 %}
Comment on lines +1 to +4
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optimally we'd share these values between the two models, open to ideas here

CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.client_ltv`
AS
WITH extracted_fields AS (
SELECT
*,
BIT_COUNT(
`mozfun`.bytes.extract_bits(days_seen_bytes, -1 * {{ lookback }}, {{ lookback }})
) AS activity_pattern,
BIT_COUNT(`mozfun`.bytes.extract_bits(days_seen_bytes, -1, 1)) AS active_on_this_date,
FROM
`moz-fx-data-shared-prod.fenix_derived.client_ltv_v1`
),
with_states AS (
SELECT
client_id,
sample_id,
as_of_date,
first_reported_country AS country,
[
STRUCT(
mozfun.ltv.android_states_v1(
adjust_network,
days_since_first_seen,
as_of_date,
first_seen_date,
activity_pattern,
active_on_this_date,
{{ max_weeks }},
first_reported_country
) AS state,
'android_states_v1' AS state_function
),
STRUCT(
mozfun.ltv.android_states_with_paid_v1(
adjust_network,
days_since_first_seen,
as_of_date,
first_seen_date,
activity_pattern,
active_on_this_date,
{{ max_weeks }},
first_reported_country
) AS state,
'android_states_with_paid_v1' AS state_function
),
STRUCT(
mozfun.ltv.android_states_with_paid_v2(
adjust_network,
days_since_first_seen,
days_since_seen,
{{ death_time }},
as_of_date,
first_seen_date,
activity_pattern,
active_on_this_date,
{{ max_weeks }},
first_reported_country
) AS state,
'android_states_with_paid_v2' AS state_function
)
] AS markov_states,
* EXCEPT (client_id, sample_id, as_of_date)
FROM
extracted_fields
)
SELECT
client_id,
sample_id,
country,
COALESCE(total_historic_ad_clicks, 0) AS total_historic_ad_clicks,
COALESCE(predicted_ad_clicks, 0) AS total_future_ad_clicks,
COALESCE(total_historic_ad_clicks, 0) + COALESCE(
predicted_ad_clicks,
0
) AS total_predicted_ad_clicks,
FROM
with_states
CROSS JOIN
UNNEST(markov_states)
JOIN
`moz-fx-data-shared-prod`.fenix_derived.ltv_state_values_v1
USING (country, state_function, state)
7 changes: 7 additions & 0 deletions sql/moz-fx-data-shared-prod/fenix/ltv_state_values/view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.ltv_state_values`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.fenix_derived.ltv_state_values_v1`
1 change: 1 addition & 0 deletions sql/moz-fx-data-shared-prod/fenix/ltv_states/view.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-- Params Note: Set these same values in fenix.client_ltv
{% set max_weeks = 32 %}
{% set death_time = 168 %}
{% set lookback = 28 %}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#fail
{{ is_unique("client_id") }}

#fail
{{ min_row_count(10000, "as_of_date = @submission_date") }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
friendly_name: Client LTV States
description: |-
LTV states by client, rather than by client-day.
Uses the most recently seen values for a client.
LTV here is number of ad-clicks.
owners:
- [email protected]
labels:
incremental: true
owner1: [email protected]
scheduling:
depends_on_past: true
dag_name: bqetl_org_mozilla_firefox_derived
date_partition_parameter: null
parameters:
- "submission_date:DATE:{{ds}}"
bigquery:
time_partitioning: null
clustering:
fields:
- sample_id
- first_reported_country
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
references: {}
deprecated: false
41 changes: 41 additions & 0 deletions sql/moz-fx-data-shared-prod/fenix_derived/client_ltv_v1/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
WITH new_data AS (
SELECT
client_id,
sample_id,
MAX(submission_date) AS as_of_date,
MAX_BY(ltv_states_v1, submission_date).* EXCEPT (client_id, sample_id, submission_date),
FROM
`moz-fx-data-shared-prod`.fenix_derived.ltv_states_v1
WHERE
{% if is_init() %}
submission_date >= "2020-01-01"
{% else %}
submission_date = @submission_date
{% endif %}
GROUP BY
client_id,
sample_id
),
historic_data AS (
SELECT
*
FROM
fenix_derived.client_ltv_v1
)
SELECT
(
CASE
WHEN new_data.as_of_date IS NULL
THEN historic_data
WHEN historic_data.as_of_date IS NULL
THEN new_data
WHEN new_data.as_of_date > historic_data.as_of_date
THEN new_data
ELSE historic_data
END
).*
FROM
historic_data
FULL OUTER JOIN
new_data
USING (sample_id, client_id)
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
fields:
- name: client_id
mode: NULLABLE
type: STRING
description: "Client ID; uniquely identifies a client. Joinable with fenix.firefox_android_clients."
- name: sample_id
mode: NULLABLE
type: INTEGER
description: "A number, 0-99, that samples by client_id."
- name: as_of_date
mode: NULLABLE
type: DATE
description: >
The date from which the markov states are pulled from.
After a year of inactivity, the markov states will stop updated;
similarly, the as_of_date will not update.
- name: first_seen_date
mode: NULLABLE
type: DATE
description: "First submission date that this client was seen on."
- name: days_since_first_seen
mode: NULLABLE
type: INTEGER
description: "Number of days since this client was first seen."
- name: days_since_seen
mode: NULLABLE
type: INTEGER
description: "Number of days since this client was last seen. For example, if they were seen yesterday, days_since_seen would be 1."
- name: consecutive_days_seen
mode: NULLABLE
type: INTEGER
description: >
Number of consecutive days this client has been seen.
For example, if they were missing two days ago but present yesterday & today, consecutive_days_seen would be 2.
- name: days_seen_bytes
mode: NULLABLE
type: BYTES
description: "Days seen over the past year, represented as bytes."
- name: ad_clicks_on_date
mode: NULLABLE
type: INTEGER
description: "Number of ad clicks by this client on this submission date."
- name: total_historic_ad_clicks
mode: NULLABLE
type: INTEGER
description: "Total historic ad clicks by this client up to this date (inclusive of this date)."
- name: first_reported_country
mode: NULLABLE
type: STRING
description: "First country reported by this client."
- name: first_reported_isp
mode: NULLABLE
type: STRING
description: "First ISP reported by this client."
- name: adjust_network
mode: NULLABLE
type: STRING
description: "First Adjust Network reported by this client."
- name: install_source
mode: NULLABLE
type: STRING
description: "First install source reported by this client."
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# fail
{{ is_unique(["country", "state"]) }}

# fail
{{ min_row_count(1000) }}

# fail
-- Each country should have a single state function
SELECT
mozfun.assert.equals(1, COUNT(DISTINCT state_function))
FROM
fenix_derived.ltv_state_values_v1
GROUP BY
country;

# fail
-- There should be more than 2 countries present
SELECT
`mozfun.assert.true`(COUNT(DISTINCT country) > 2)
FROM
fenix_derived.ltv_state_values_v1;
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
friendly_name: Ltv State Values
description: |-
Android state values, in terms of Ad Clicks.
Each country has their own ad click LTV for each state.
owners:
- [email protected]
labels:
incremental: false
owner1: [email protected]
scheduling:
dag_name: bqetl_org_mozilla_firefox_derived
depends_on_past: false
date_partition_parameter: null
bigquery:
time_partitioning: null
clustering:
fields: [country]
references: {}
deprecated: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
SELECT
state,
ltv AS predicted_ad_clicks,
t AS time_horizon,
country,
CASE
WHEN country IN (
'IN',
'US',
'CA',
'DE',
'BE',
'FR',
'GB',
'CH',
'NL',
'ES',
'AT',
'MX',
'PL',
'IT'
)
THEN 'android_states_with_paid_v1'
WHEN country IN ('BR', 'KE', 'AU', 'JP')
THEN 'android_states_v1'
ELSE NULL
END AS state_function
FROM
mozdata.analysis.android_state_ltvs_v1
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
fields:
- name: state
type: STRING
mode: NULLABLE
description: "The markov state, as determined by the state function."
- name: predicted_ad_clicks
type: FLOAT64
mode: NULLABLE
description: "Number of predicted ad clicks for the specified time horizon."
- name: time_horizon
type: INTEGER
mode: NULLABLE
description: "Number of days into the future that ad clicks are being predicted for."
- name: country
type: STRING
mode: NULLABLE
description: "The country that this prediction is specified for."
- name: state_function
type: STRING
mode: NULLABLE
description: "The state function used to determine the LTV for this state."