· 7 years ago · Jan 04, 2019, 08:20 PM
1drop table cshibe.euphoriainstitute_enroll;
2drop table cshibe.learnatlincolnenroll;
3drop table cshibe.linceduenroll;
4drop table cshibe.lincolntech_usaenroll;
5drop table cshibe.lincolntechenroll;
6drop table cshibe.lincolnculinaryinstituteenroll;
7drop table cshibe.overall_enroll;
8--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
9--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
10
11DROP TABLE IF EXISTS parameters;
12
13-- Table to hold parameters
14-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
15CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
16 account VARCHAR(MAX),
17 start_date TIMESTAMP,
18 end_date TIMESTAMP,
19 advertiser_url_base_domain VARCHAR(512)
20);
21
22PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
23INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
24VALUES($1, $2, $3, $4);
25
26EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'learnatlincoln.com');
27
28DEALLOCATE insert_parameters;
29
30DROP TABLE IF EXISTS advertiser_visits;
31
32-- Get advertiser visits
33-- Execution time: 3m 6s
34SELECT
35 l.token,
36 d.uuid did,
37 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
38 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
39 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
40 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
41 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
42 ad.attribute_07 as enroll
43INTO TEMP advertiser_visits
44FROM all_leads l
45 INNER JOIN all_urls u ON u.token = l.token
46 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
47 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
48 LEFT OUTER JOIN (
49 SELECT token
50 FROM consumer_frequency
51 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
52 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
53 GROUP BY token
54 ) inquiry ON inquiry.token = l.token
55
56 -- Identify any advertiser visit where the referrer was a 3rd party source URL
57 LEFT OUTER JOIN (
58 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
59 FROM audits
60 WHERE account = (SELECT account FROM parameters) AND
61 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
62 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
63 audit_timestamp < (SELECT end_date FROM parameters) AND
64 call_center IS NULL AND
65 type = 1 AND
66 authentic AND
67 COALESCE(url_domain, '') <> '' AND
68 provider IS NOT NULL AND
69 creator_account_code IS NOT NULL
70 GROUP BY 1, 2
71 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
72WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
73 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
74 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
75 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
76
77-- Get publisher visits
78-- Execution time: 4m 2s
79DROP TABLE IF EXISTS publisher_visits;
80
81SELECT
82 l.token,
83 u.landing_page_url url_domain,
84 d.uuid did,
85 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
86 COALESCE(
87 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
88 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
89 TIMESTAMP '3000-01-01'
90 ) next_timestamp
91INTO TEMP publisher_visits
92FROM all_leads l
93 INNER JOIN deviceid d ON d.token = l.token
94 INNER JOIN all_urls u ON u.token = l.token
95WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
96 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
97 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
98 d.methods IN (1, 3) AND
99 u.landing_page_url IN(
100 SELECT url_domain
101 FROM audits
102 WHERE account = (SELECT account FROM parameters) AND
103 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
104 audit_timestamp < (SELECT end_date FROM parameters) AND
105 call_center IS NULL AND
106 type = 1 AND
107 authentic AND
108 COALESCE(url_domain, '') <> '' AND
109 provider IS NOT NULL AND
110 creator_account_code IS NOT NULL
111 GROUP BY 1
112 );
113
114-- Produce attribution report
115-- Execution time: 16m 23s
116SELECT
117 COALESCE(url_provider.provider, 'None') provider,
118 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
119 COUNT(*) visits,
120 SUM(inquiry) inquiries,
121 SUM(enroll) as enrolls,
122 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
123 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
124 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
125 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
126 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
127 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
128 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
129 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
130 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
131 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
132 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
133 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
134 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
135 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
136 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
137 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
138 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
139 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
140 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
141 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
142 INTO cshibe.learnatlincolnenroll
143FROM advertiser_visits a
144 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
145 p.visit_timestamp < a.visit_timestamp AND
146 p.next_timestamp >= a.visit_timestamp AND
147 a.direct_referrer = 0
148 LEFT OUTER JOIN (
149 SELECT url_domain, provider, "direct"
150 FROM (
151 SELECT
152 url_domain,
153 provider,
154 "direct",
155 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
156 FROM (
157 SELECT
158 url_domain,
159 provider,
160 (accounts.entity_code = provider_code) :: SMALLINT "direct",
161 ((accounts.entity_code = provider_code) :: SMALLINT)
162 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
163 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
164 FROM audits
165 INNER JOIN accounts ON accounts.code = audits.creator_account_code
166 WHERE account = (SELECT account FROM parameters) AND
167 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
168 audit_timestamp < (SELECT end_date FROM parameters) AND
169 call_center IS NULL AND
170 type = 1 AND
171 authentic AND
172 COALESCE(url_domain, '') <> '' AND
173 provider IS NOT NULL AND
174 creator_account_code IS NOT NULL
175 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
176 )
177 GROUP BY
178 url_domain,
179 provider,
180 "direct",
181 weighted_pct
182 )
183 WHERE is_url_provider
184 ) url_provider ON url_provider.url_domain = p.url_domain
185GROUP BY 1, 2;
186
187
188
189--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
190--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
191
192DROP TABLE IF EXISTS parameters;
193
194-- Table to hold parameters
195-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
196CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
197 account VARCHAR(MAX),
198 start_date TIMESTAMP,
199 end_date TIMESTAMP,
200 advertiser_url_base_domain VARCHAR(512)
201);
202
203PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
204INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
205VALUES($1, $2, $3, $4);
206
207EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'lincedu.com');
208
209DEALLOCATE insert_parameters;
210
211DROP TABLE IF EXISTS advertiser_visits;
212
213-- Get advertiser visits
214-- Execution time: 3m 6s
215SELECT
216 l.token,
217 d.uuid did,
218 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
219 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
220 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
221 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
222 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
223 ad.attribute_07 as enroll
224INTO TEMP advertiser_visits
225FROM all_leads l
226 INNER JOIN all_urls u ON u.token = l.token
227 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
228 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
229 LEFT OUTER JOIN (
230 SELECT token
231 FROM consumer_frequency
232 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
233 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
234 GROUP BY token
235 ) inquiry ON inquiry.token = l.token
236
237 -- Identify any advertiser visit where the referrer was a 3rd party source URL
238 LEFT OUTER JOIN (
239 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
240 FROM audits
241 WHERE account = (SELECT account FROM parameters) AND
242 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
243 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
244 audit_timestamp < (SELECT end_date FROM parameters) AND
245 call_center IS NULL AND
246 type = 1 AND
247 authentic AND
248 COALESCE(url_domain, '') <> '' AND
249 provider IS NOT NULL AND
250 creator_account_code IS NOT NULL
251 GROUP BY 1, 2
252 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
253WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
254 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
255 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
256 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
257
258-- Get publisher visits
259-- Execution time: 4m 2s
260DROP TABLE IF EXISTS publisher_visits;
261
262SELECT
263 l.token,
264 u.landing_page_url url_domain,
265 d.uuid did,
266 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
267 COALESCE(
268 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
269 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
270 TIMESTAMP '3000-01-01'
271 ) next_timestamp
272INTO TEMP publisher_visits
273FROM all_leads l
274 INNER JOIN deviceid d ON d.token = l.token
275 INNER JOIN all_urls u ON u.token = l.token
276WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
277 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
278 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
279 d.methods IN (1, 3) AND
280 u.landing_page_url IN(
281 SELECT url_domain
282 FROM audits
283 WHERE account = (SELECT account FROM parameters) AND
284 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
285 audit_timestamp < (SELECT end_date FROM parameters) AND
286 call_center IS NULL AND
287 type = 1 AND
288 authentic AND
289 COALESCE(url_domain, '') <> '' AND
290 provider IS NOT NULL AND
291 creator_account_code IS NOT NULL
292 GROUP BY 1
293 );
294
295-- Produce attribution report
296-- Execution time: 16m 23s
297SELECT
298 COALESCE(url_provider.provider, 'None') provider,
299 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
300 COUNT(*) visits,
301 SUM(inquiry) inquiries,
302 SUM(enroll) as enrolls,
303 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
304 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
305 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
306 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
307 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
308 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
309 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
310 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
311 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
312 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
313 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
314 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
315 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
316 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
317 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
318 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
319 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
320 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
321 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
322 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
323 INTO cshibe.linceduenroll
324FROM advertiser_visits a
325 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
326 p.visit_timestamp < a.visit_timestamp AND
327 p.next_timestamp >= a.visit_timestamp AND
328 a.direct_referrer = 0
329 LEFT OUTER JOIN (
330 SELECT url_domain, provider, "direct"
331 FROM (
332 SELECT
333 url_domain,
334 provider,
335 "direct",
336 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
337 FROM (
338 SELECT
339 url_domain,
340 provider,
341 (accounts.entity_code = provider_code) :: SMALLINT "direct",
342 ((accounts.entity_code = provider_code) :: SMALLINT)
343 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
344 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
345 FROM audits
346 INNER JOIN accounts ON accounts.code = audits.creator_account_code
347 WHERE account = (SELECT account FROM parameters) AND
348 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
349 audit_timestamp < (SELECT end_date FROM parameters) AND
350 call_center IS NULL AND
351 type = 1 AND
352 authentic AND
353 COALESCE(url_domain, '') <> '' AND
354 provider IS NOT NULL AND
355 creator_account_code IS NOT NULL
356 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
357 )
358 GROUP BY
359 url_domain,
360 provider,
361 "direct",
362 weighted_pct
363 )
364 WHERE is_url_provider
365 ) url_provider ON url_provider.url_domain = p.url_domain
366GROUP BY 1, 2;
367
368
369
370--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
371--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
372
373DROP TABLE IF EXISTS parameters;
374
375-- Table to hold parameters
376-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
377CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
378 account VARCHAR(MAX),
379 start_date TIMESTAMP,
380 end_date TIMESTAMP,
381 advertiser_url_base_domain VARCHAR(512)
382);
383
384PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
385INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
386VALUES($1, $2, $3, $4);
387
388EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'lincolntech.edu');
389
390DEALLOCATE insert_parameters;
391
392DROP TABLE IF EXISTS advertiser_visits;
393
394-- Get advertiser visits
395-- Execution time: 3m 6s
396SELECT
397 l.token,
398 d.uuid did,
399 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
400 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
401 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
402 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
403 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
404 ad.attribute_07 as enroll
405INTO TEMP advertiser_visits
406FROM all_leads l
407 INNER JOIN all_urls u ON u.token = l.token
408 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
409 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
410 LEFT OUTER JOIN (
411 SELECT token
412 FROM consumer_frequency
413 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
414 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
415 GROUP BY token
416 ) inquiry ON inquiry.token = l.token
417
418 -- Identify any advertiser visit where the referrer was a 3rd party source URL
419 LEFT OUTER JOIN (
420 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
421 FROM audits
422 WHERE account = (SELECT account FROM parameters) AND
423 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
424 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
425 audit_timestamp < (SELECT end_date FROM parameters) AND
426 call_center IS NULL AND
427 type = 1 AND
428 authentic AND
429 COALESCE(url_domain, '') <> '' AND
430 provider IS NOT NULL AND
431 creator_account_code IS NOT NULL
432 GROUP BY 1, 2
433 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
434WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
435 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
436 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
437 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
438
439-- Get publisher visits
440-- Execution time: 4m 2s
441DROP TABLE IF EXISTS publisher_visits;
442
443SELECT
444 l.token,
445 u.landing_page_url url_domain,
446 d.uuid did,
447 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
448 COALESCE(
449 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
450 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
451 TIMESTAMP '3000-01-01'
452 ) next_timestamp
453INTO TEMP publisher_visits
454FROM all_leads l
455 INNER JOIN deviceid d ON d.token = l.token
456 INNER JOIN all_urls u ON u.token = l.token
457WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
458 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
459 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
460 d.methods IN (1, 3) AND
461 u.landing_page_url IN(
462 SELECT url_domain
463 FROM audits
464 WHERE account = (SELECT account FROM parameters) AND
465 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
466 audit_timestamp < (SELECT end_date FROM parameters) AND
467 call_center IS NULL AND
468 type = 1 AND
469 authentic AND
470 COALESCE(url_domain, '') <> '' AND
471 provider IS NOT NULL AND
472 creator_account_code IS NOT NULL
473 GROUP BY 1
474 );
475
476-- Produce attribution report
477-- Execution time: 16m 23s
478SELECT
479 COALESCE(url_provider.provider, 'None') provider,
480 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
481 COUNT(*) visits,
482 SUM(inquiry) inquiries,
483 SUM(enroll) as enrolls,
484 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
485 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
486 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
487 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
488 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
489 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
490 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
491 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
492 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
493 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
494 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
495 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
496 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
497 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
498 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
499 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
500 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
501 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
502 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
503 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
504 INTO cshibe.lincolntechenroll
505FROM advertiser_visits a
506 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
507 p.visit_timestamp < a.visit_timestamp AND
508 p.next_timestamp >= a.visit_timestamp AND
509 a.direct_referrer = 0
510 LEFT OUTER JOIN (
511 SELECT url_domain, provider, "direct"
512 FROM (
513 SELECT
514 url_domain,
515 provider,
516 "direct",
517 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
518 FROM (
519 SELECT
520 url_domain,
521 provider,
522 (accounts.entity_code = provider_code) :: SMALLINT "direct",
523 ((accounts.entity_code = provider_code) :: SMALLINT)
524 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
525 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
526 FROM audits
527 INNER JOIN accounts ON accounts.code = audits.creator_account_code
528 WHERE account = (SELECT account FROM parameters) AND
529 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
530 audit_timestamp < (SELECT end_date FROM parameters) AND
531 call_center IS NULL AND
532 type = 1 AND
533 authentic AND
534 COALESCE(url_domain, '') <> '' AND
535 provider IS NOT NULL AND
536 creator_account_code IS NOT NULL
537 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
538 )
539 GROUP BY
540 url_domain,
541 provider,
542 "direct",
543 weighted_pct
544 )
545 WHERE is_url_provider
546 ) url_provider ON url_provider.url_domain = p.url_domain
547GROUP BY 1, 2;
548
549
550--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
551--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
552
553DROP TABLE IF EXISTS parameters;
554
555-- Table to hold parameters
556-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
557CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
558 account VARCHAR(MAX),
559 start_date TIMESTAMP,
560 end_date TIMESTAMP,
561 advertiser_url_base_domain VARCHAR(512)
562);
563
564PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
565INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
566VALUES($1, $2, $3, $4);
567
568EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'lincolntech-usa.com');
569
570DEALLOCATE insert_parameters;
571
572DROP TABLE IF EXISTS advertiser_visits;
573
574-- Get advertiser visits
575-- Execution time: 3m 6s
576SELECT
577 l.token,
578 d.uuid did,
579 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
580 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
581 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
582 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
583 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
584 ad.attribute_07 as enroll
585INTO TEMP advertiser_visits
586FROM all_leads l
587 INNER JOIN all_urls u ON u.token = l.token
588 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
589 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
590 LEFT OUTER JOIN (
591 SELECT token
592 FROM consumer_frequency
593 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
594 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
595 GROUP BY token
596 ) inquiry ON inquiry.token = l.token
597
598 -- Identify any advertiser visit where the referrer was a 3rd party source URL
599 LEFT OUTER JOIN (
600 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
601 FROM audits
602 WHERE account = (SELECT account FROM parameters) AND
603 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
604 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
605 audit_timestamp < (SELECT end_date FROM parameters) AND
606 call_center IS NULL AND
607 type = 1 AND
608 authentic AND
609 COALESCE(url_domain, '') <> '' AND
610 provider IS NOT NULL AND
611 creator_account_code IS NOT NULL
612 GROUP BY 1, 2
613 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
614WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
615 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
616 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
617 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
618
619-- Get publisher visits
620-- Execution time: 4m 2s
621DROP TABLE IF EXISTS publisher_visits;
622
623SELECT
624 l.token,
625 u.landing_page_url url_domain,
626 d.uuid did,
627 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
628 COALESCE(
629 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
630 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
631 TIMESTAMP '3000-01-01'
632 ) next_timestamp
633INTO TEMP publisher_visits
634FROM all_leads l
635 INNER JOIN deviceid d ON d.token = l.token
636 INNER JOIN all_urls u ON u.token = l.token
637WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
638 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
639 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
640 d.methods IN (1, 3) AND
641 u.landing_page_url IN(
642 SELECT url_domain
643 FROM audits
644 WHERE account = (SELECT account FROM parameters) AND
645 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
646 audit_timestamp < (SELECT end_date FROM parameters) AND
647 call_center IS NULL AND
648 type = 1 AND
649 authentic AND
650 COALESCE(url_domain, '') <> '' AND
651 provider IS NOT NULL AND
652 creator_account_code IS NOT NULL
653 GROUP BY 1
654 );
655
656-- Produce attribution report
657-- Execution time: 16m 23s
658SELECT
659 COALESCE(url_provider.provider, 'None') provider,
660 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
661 COUNT(*) visits,
662 SUM(inquiry) inquiries,
663 SUM(enroll) as enrolls,
664 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
665 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
666 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
667 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
668 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
669 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
670 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
671 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
672 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
673 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
674 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
675 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
676 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
677 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
678 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
679 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
680 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
681 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
682 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
683 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
684 INTO cshibe.lincolntech_usaenroll
685FROM advertiser_visits a
686 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
687 p.visit_timestamp < a.visit_timestamp AND
688 p.next_timestamp >= a.visit_timestamp AND
689 a.direct_referrer = 0
690 LEFT OUTER JOIN (
691 SELECT url_domain, provider, "direct"
692 FROM (
693 SELECT
694 url_domain,
695 provider,
696 "direct",
697 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
698 FROM (
699 SELECT
700 url_domain,
701 provider,
702 (accounts.entity_code = provider_code) :: SMALLINT "direct",
703 ((accounts.entity_code = provider_code) :: SMALLINT)
704 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
705 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
706 FROM audits
707 INNER JOIN accounts ON accounts.code = audits.creator_account_code
708 WHERE account = (SELECT account FROM parameters) AND
709 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
710 audit_timestamp < (SELECT end_date FROM parameters) AND
711 call_center IS NULL AND
712 type = 1 AND
713 authentic AND
714 COALESCE(url_domain, '') <> '' AND
715 provider IS NOT NULL AND
716 creator_account_code IS NOT NULL
717 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
718 )
719 GROUP BY
720 url_domain,
721 provider,
722 "direct",
723 weighted_pct
724 )
725 WHERE is_url_provider
726 ) url_provider ON url_provider.url_domain = p.url_domain
727GROUP BY 1, 2;
728
729
730--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
731--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
732
733DROP TABLE IF EXISTS parameters;
734
735-- Table to hold parameters
736-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
737CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
738 account VARCHAR(MAX),
739 start_date TIMESTAMP,
740 end_date TIMESTAMP,
741 advertiser_url_base_domain VARCHAR(512)
742);
743
744PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
745INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
746VALUES($1, $2, $3, $4);
747
748EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'lincolnculinaryinstitute.com');
749
750DEALLOCATE insert_parameters;
751
752DROP TABLE IF EXISTS advertiser_visits;
753
754-- Get advertiser visits
755-- Execution time: 3m 6s
756SELECT
757 l.token,
758 d.uuid did,
759 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
760 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
761 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
762 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
763 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
764 ad.attribute_07 as enroll
765INTO TEMP advertiser_visits
766FROM all_leads l
767 INNER JOIN all_urls u ON u.token = l.token
768 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
769 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
770 LEFT OUTER JOIN (
771 SELECT token
772 FROM consumer_frequency
773 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
774 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
775 GROUP BY token
776 ) inquiry ON inquiry.token = l.token
777
778 -- Identify any advertiser visit where the referrer was a 3rd party source URL
779 LEFT OUTER JOIN (
780 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
781 FROM audits
782 WHERE account = (SELECT account FROM parameters) AND
783 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
784 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
785 audit_timestamp < (SELECT end_date FROM parameters) AND
786 call_center IS NULL AND
787 type = 1 AND
788 authentic AND
789 COALESCE(url_domain, '') <> '' AND
790 provider IS NOT NULL AND
791 creator_account_code IS NOT NULL
792 GROUP BY 1, 2
793 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
794WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
795 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
796 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
797 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
798
799-- Get publisher visits
800-- Execution time: 4m 2s
801DROP TABLE IF EXISTS publisher_visits;
802
803SELECT
804 l.token,
805 u.landing_page_url url_domain,
806 d.uuid did,
807 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
808 COALESCE(
809 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
810 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
811 TIMESTAMP '3000-01-01'
812 ) next_timestamp
813INTO TEMP publisher_visits
814FROM all_leads l
815 INNER JOIN deviceid d ON d.token = l.token
816 INNER JOIN all_urls u ON u.token = l.token
817WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
818 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
819 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
820 d.methods IN (1, 3) AND
821 u.landing_page_url IN(
822 SELECT url_domain
823 FROM audits
824 WHERE account = (SELECT account FROM parameters) AND
825 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
826 audit_timestamp < (SELECT end_date FROM parameters) AND
827 call_center IS NULL AND
828 type = 1 AND
829 authentic AND
830 COALESCE(url_domain, '') <> '' AND
831 provider IS NOT NULL AND
832 creator_account_code IS NOT NULL
833 GROUP BY 1
834 );
835
836-- Produce attribution report
837-- Execution time: 16m 23s
838SELECT
839 COALESCE(url_provider.provider, 'None') provider,
840 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
841 COUNT(*) visits,
842 SUM(inquiry) inquiries,
843 SUM(enroll) as enrolls,
844 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
845 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
846 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
847 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
848 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
849 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
850 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
851 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
852 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
853 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
854 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
855 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
856 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
857 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
858 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
859 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
860 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
861 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
862 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
863 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
864 INTO cshibe.lincolnculinaryinstituteenroll
865FROM advertiser_visits a
866 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
867 p.visit_timestamp < a.visit_timestamp AND
868 p.next_timestamp >= a.visit_timestamp AND
869 a.direct_referrer = 0
870 LEFT OUTER JOIN (
871 SELECT url_domain, provider, "direct"
872 FROM (
873 SELECT
874 url_domain,
875 provider,
876 "direct",
877 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
878 FROM (
879 SELECT
880 url_domain,
881 provider,
882 (accounts.entity_code = provider_code) :: SMALLINT "direct",
883 ((accounts.entity_code = provider_code) :: SMALLINT)
884 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
885 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
886 FROM audits
887 INNER JOIN accounts ON accounts.code = audits.creator_account_code
888 WHERE account = (SELECT account FROM parameters) AND
889 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
890 audit_timestamp < (SELECT end_date FROM parameters) AND
891 call_center IS NULL AND
892 type = 1 AND
893 authentic AND
894 COALESCE(url_domain, '') <> '' AND
895 provider IS NOT NULL AND
896 creator_account_code IS NOT NULL
897 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
898 )
899 GROUP BY
900 url_domain,
901 provider,
902 "direct",
903 weighted_pct
904 )
905 WHERE is_url_provider
906 ) url_provider ON url_provider.url_domain = p.url_domain
907GROUP BY 1, 2;
908
909
910
911--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
912--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
913
914DROP TABLE IF EXISTS parameters;
915
916-- Table to hold parameters
917-- Note: passing NULL for advertiser_url_base_domain means we will not filter for a specific advertiser URL base domain)
918CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
919 account VARCHAR(MAX),
920 start_date TIMESTAMP,
921 end_date TIMESTAMP,
922 advertiser_url_base_domain VARCHAR(512)
923);
924
925PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP, CHAR) AS
926INSERT INTO parameters(account, start_date, end_date, advertiser_url_base_domain)
927VALUES($1, $2, $3, $4);
928
929EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01', 'euphoriainstitute.com');
930
931DEALLOCATE insert_parameters;
932
933DROP TABLE IF EXISTS advertiser_visits;
934
935-- Get advertiser visits
936-- Execution time: 3m 6s
937SELECT
938 l.token,
939 d.uuid did,
940 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
941 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
942 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
943 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
944 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
945 ad.attribute_07 as enroll
946INTO TEMP advertiser_visits
947FROM all_leads l
948 INNER JOIN all_urls u ON u.token = l.token
949 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
950 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
951 LEFT OUTER JOIN (
952 SELECT token
953 FROM consumer_frequency
954 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
955 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
956 GROUP BY token
957 ) inquiry ON inquiry.token = l.token
958
959 -- Identify any advertiser visit where the referrer was a 3rd party source URL
960 LEFT OUTER JOIN (
961 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
962 FROM audits
963 WHERE account = (SELECT account FROM parameters) AND
964 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
965 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
966 audit_timestamp < (SELECT end_date FROM parameters) AND
967 call_center IS NULL AND
968 type = 1 AND
969 authentic AND
970 COALESCE(url_domain, '') <> '' AND
971 provider IS NOT NULL AND
972 creator_account_code IS NOT NULL
973 GROUP BY 1, 2
974 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
975WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
976 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
977 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
978 REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$') = COALESCE((SELECT advertiser_url_base_domain FROM parameters), REGEXP_SUBSTR(SPLIT_PART(u.landing_page_url, '/', 1), '([^.]+)\.[^.]+$'));
979
980-- Get publisher visits
981-- Execution time: 4m 2s
982DROP TABLE IF EXISTS publisher_visits;
983
984SELECT
985 l.token,
986 u.landing_page_url url_domain,
987 d.uuid did,
988 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
989 COALESCE(
990 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
991 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
992 TIMESTAMP '3000-01-01'
993 ) next_timestamp
994INTO TEMP publisher_visits
995FROM all_leads l
996 INNER JOIN deviceid d ON d.token = l.token
997 INNER JOIN all_urls u ON u.token = l.token
998WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
999 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
1000 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
1001 d.methods IN (1, 3) AND
1002 u.landing_page_url IN(
1003 SELECT url_domain
1004 FROM audits
1005 WHERE account = (SELECT account FROM parameters) AND
1006 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
1007 audit_timestamp < (SELECT end_date FROM parameters) AND
1008 call_center IS NULL AND
1009 type = 1 AND
1010 authentic AND
1011 COALESCE(url_domain, '') <> '' AND
1012 provider IS NOT NULL AND
1013 creator_account_code IS NOT NULL
1014 GROUP BY 1
1015 );
1016
1017-- Produce attribution report
1018-- Execution time: 16m 23s
1019SELECT
1020 COALESCE(url_provider.provider, 'None') provider,
1021 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
1022 COUNT(*) visits,
1023 SUM(inquiry) inquiries,
1024 SUM(enroll) as enrolls,
1025 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
1026 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
1027 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
1028 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
1029 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
1030 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
1031 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
1032 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
1033 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
1034 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
1035 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
1036 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
1037 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
1038 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
1039 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
1040 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
1041 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
1042 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
1043 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
1044 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
1045 INTO cshibe.euphoriainstitute_enroll
1046FROM advertiser_visits a
1047 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
1048 p.visit_timestamp < a.visit_timestamp AND
1049 p.next_timestamp >= a.visit_timestamp AND
1050 a.direct_referrer = 0
1051 LEFT OUTER JOIN (
1052 SELECT url_domain, provider, "direct"
1053 FROM (
1054 SELECT
1055 url_domain,
1056 provider,
1057 "direct",
1058 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
1059 FROM (
1060 SELECT
1061 url_domain,
1062 provider,
1063 (accounts.entity_code = provider_code) :: SMALLINT "direct",
1064 ((accounts.entity_code = provider_code) :: SMALLINT)
1065 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
1066 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
1067 FROM audits
1068 INNER JOIN accounts ON accounts.code = audits.creator_account_code
1069 WHERE account = (SELECT account FROM parameters) AND
1070 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
1071 audit_timestamp < (SELECT end_date FROM parameters) AND
1072 call_center IS NULL AND
1073 type = 1 AND
1074 authentic AND
1075 COALESCE(url_domain, '') <> '' AND
1076 provider IS NOT NULL AND
1077 creator_account_code IS NOT NULL
1078 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
1079 )
1080 GROUP BY
1081 url_domain,
1082 provider,
1083 "direct",
1084 weighted_pct
1085 )
1086 WHERE is_url_provider
1087 ) url_provider ON url_provider.url_domain = p.url_domain
1088GROUP BY 1, 2;
1089
1090
1091--A modification was made in early Sept to add month to the merge criteria to the part of the query that we "Identify any advertiser visit where the referrer was a 3rd party source URL"
1092--This was to solve the issue of how a new url getting audited could cause historical lead sources to change from being a non-direct to a direct referrer
1093
1094
1095DROP TABLE IF EXISTS parameters;
1096
1097-- Table to hold parameters
1098CREATE TEMPORARY TABLE IF NOT EXISTS parameters(
1099 account VARCHAR(MAX),
1100 start_date TIMESTAMP,
1101 end_date TIMESTAMP
1102);
1103
1104PREPARE insert_parameters(CHAR, TIMESTAMP, TIMESTAMP) AS
1105INSERT INTO parameters(account, start_date, end_date)
1106VALUES($1, $2, $3);
1107
1108EXECUTE insert_parameters('Lincoln Technical Institute, Inc.', '2018-05-01', '2018-09-01');
1109DEALLOCATE insert_parameters;
1110
1111
1112DROP TABLE IF EXISTS advertiser_visits;
1113
1114-- Get advertiser visits
1115-- Execution time: 3m 6s
1116SELECT
1117 l.token,
1118 d.uuid did,
1119 (inquiry.token IS NOT NULL) :: SMALLINT inquiry,
1120 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
1121 (l.token = FIRST_VALUE(l.token) OVER(PARTITION BY d.uuid ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT first_visit,
1122 ISNULL(((inquiry.token = FIRST_VALUE(l.token) OVER(PARTITION BY CASE WHEN inquiry.token IS NOT NULL THEN d.uuid ELSE NULL END ORDER BY l.created ROWS UNBOUNDED PRECEDING)) :: SMALLINT), 0) first_inquiry,
1123 (direct_referrer.url_domain IS NOT NULL) :: SMALLINT direct_referrer,
1124 ad.attribute_07 as enroll
1125INTO TEMP advertiser_visits
1126FROM all_leads l
1127 LEFT OUTER JOIN audits_deduped ad ON ad.token = l.token AND ad.account_code = l.account_code
1128 LEFT OUTER JOIN deviceid d ON d.token = l.token AND d.methods IN (1, 3)
1129 LEFT OUTER JOIN all_urls u ON u.token = l.token
1130 LEFT OUTER JOIN (
1131 SELECT token
1132 FROM consumer_frequency
1133 WHERE created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
1134 created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters))
1135 GROUP BY token
1136 ) inquiry ON inquiry.token = l.token
1137
1138 -- Identify any advertiser visit where the referrer was a 3rd party source URL
1139 LEFT OUTER JOIN (
1140 SELECT REGEXP_SUBSTR(url_domain, '([^.]+)\.[^.]+$') url_domain, date_trunc('month', lead_created_timestamp) created_month
1141 FROM audits
1142 WHERE account = (SELECT account FROM parameters) AND
1143 -- audit_timestamp_day >= DATEADD(MONTH, -1, (SELECT start_date FROM parameters)) AND
1144 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
1145 audit_timestamp < (SELECT end_date FROM parameters) AND
1146 call_center IS NULL AND
1147 type = 1 AND
1148 authentic AND
1149 COALESCE(url_domain, '') <> '' AND
1150 provider IS NOT NULL AND
1151 creator_account_code IS NOT NULL
1152 GROUP BY 1, 2
1153 ) direct_referrer ON direct_referrer.url_domain = REGEXP_SUBSTR(u.referrer_host, '([^.]+)\.[^.]+$') and direct_referrer.created_month = date_trunc('month', TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND')
1154WHERE l.account_code = (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
1155 l.created >= EXTRACT(EPOCH FROM (SELECT start_date FROM parameters)) AND
1156 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters));
1157
1158-- Get publisher visits
1159-- Execution time: 4m 2s
1160DROP TABLE IF EXISTS publisher_visits;
1161
1162SELECT
1163 l.token,
1164 u.landing_page_url url_domain,
1165 d.uuid did,
1166 TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND' visit_timestamp,
1167 COALESCE(
1168 LAG(TIMESTAMP 'EPOCH' + l.created * INTERVAL '1 SECOND', 1)
1169 OVER(PARTITION BY did ORDER BY VISIT_TIMESTAMP DESC),
1170 TIMESTAMP '3000-01-01'
1171 ) next_timestamp
1172INTO TEMP publisher_visits
1173FROM all_leads l
1174 INNER JOIN deviceid d ON d.token = l.token
1175 INNER JOIN all_urls u ON u.token = l.token
1176WHERE l.account_code <> (SELECT code FROM accounts WHERE name = (SELECT account FROM parameters)) AND
1177 l.created >= EXTRACT(EPOCH FROM (SELECT start_date - INTERVAL '30 DAY' FROM parameters)) AND
1178 l.created < EXTRACT(EPOCH FROM (SELECT end_date FROM parameters)) AND
1179 d.methods IN (1, 3) AND
1180 u.landing_page_url IN(
1181 SELECT url_domain
1182 FROM audits
1183 WHERE account = (SELECT account FROM parameters) AND
1184 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
1185 audit_timestamp < (SELECT end_date FROM parameters) AND
1186 call_center IS NULL AND
1187 type = 1 AND
1188 authentic AND
1189 COALESCE(url_domain, '') <> '' AND
1190 provider IS NOT NULL AND
1191 creator_account_code IS NOT NULL
1192 GROUP BY 1
1193 );
1194
1195-- Product attribution report
1196-- Execution time: 16m 23s
1197SELECT
1198 COALESCE(url_provider.provider, 'None') provider,
1199 CASE WHEN "direct" = 1 THEN 'Direct' WHEN "direct" = 0 THEN 'Aggregated' ELSE 'N/A' END direct_vs_agg,
1200 COUNT(*) visits,
1201 SUM(inquiry) inquiries,
1202 SUM(enroll) as enrolls,
1203 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_visit",
1204 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_inquiry",
1205 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_visit",
1206 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '30 DAY') :: SMALLINT), 0) "30_day_1st_inquiry",
1207 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_visit",
1208 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_inquiry",
1209 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_visit",
1210 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 WEEK') :: SMALLINT), 0) "1_week_1st_inquiry",
1211 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_visit",
1212 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_inquiry",
1213 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_visit",
1214 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 DAY') :: SMALLINT), 0) "1_day_1st_inquiry",
1215 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_visit",
1216 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_inquiry",
1217 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_visit",
1218 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '1 HOUR') :: SMALLINT), 0) "1_hour_1st_inquiry",
1219 COALESCE(SUM((a.visit_timestamp - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_visit",
1220 COALESCE(SUM((CASE WHEN inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_inquiry",
1221 COALESCE(SUM((CASE WHEN first_visit :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_visit",
1222 COALESCE(SUM((CASE WHEN first_inquiry :: BOOLEAN THEN a.visit_timestamp END - p.visit_timestamp < INTERVAL '5 MINUTES') :: SMALLINT), 0) "5_min_1st_inquiry"
1223 INTO cshibe.Overall_Enroll
1224FROM advertiser_visits a
1225 LEFT OUTER JOIN publisher_visits p ON p.did = a.did AND
1226 p.visit_timestamp < a.visit_timestamp AND
1227 p.next_timestamp >= a.visit_timestamp AND
1228 a.direct_referrer = 0
1229 LEFT OUTER JOIN (
1230 SELECT url_domain, provider, "direct"
1231 FROM (
1232 SELECT
1233 url_domain,
1234 provider,
1235 "direct",
1236 weighted_pct = MAX(weighted_pct) OVER(PARTITION BY url_domain) is_url_provider
1237 FROM (
1238 SELECT
1239 url_domain,
1240 provider,
1241 (accounts.entity_code = provider_code) :: SMALLINT "direct",
1242 ((accounts.entity_code = provider_code) :: SMALLINT)
1243 + SUM(1.0) / SUM(COUNT(*)) OVER(PARTITION BY url_domain)
1244 + (1.0 / STRTOL(CRC32(provider), 16)) weighted_pct
1245 FROM audits
1246 INNER JOIN accounts ON accounts.code = audits.creator_account_code
1247 WHERE account = (SELECT account FROM parameters) AND
1248 audit_timestamp >= (SELECT start_date - INTERVAL '30 DAY' FROM parameters) AND
1249 audit_timestamp < (SELECT end_date FROM parameters) AND
1250 call_center IS NULL AND
1251 type = 1 AND
1252 authentic AND
1253 COALESCE(url_domain, '') <> '' AND
1254 provider IS NOT NULL AND
1255 creator_account_code IS NOT NULL
1256 GROUP BY url_domain, provider, accounts.entity_code, provider_code, "direct"
1257 )
1258 GROUP BY
1259 url_domain,
1260 provider,
1261 "direct",
1262 weighted_pct
1263 )
1264 WHERE is_url_provider
1265 ) url_provider ON url_provider.url_domain = p.url_domain
1266GROUP BY 1, 2;
1267
1268
1269select * from euphoriainstitute_enroll;
1270select * from learnatlincolnenroll;
1271select * from linceduenroll;
1272select * from lincolntech_usaenroll;
1273select * from lincolntechenroll;
1274select * from lincolnculinaryinstituteenroll;
1275select * from overall_enroll;