· 7 years ago · Dec 09, 2018, 03:18 AM
1-- *************************************************************
2-- This script creates and populates report performance of our conversion funnel.
3-- I will be using MYSQL for this exercise.
4-- I normally use Python and bridge a connection to the database when doing ETL.
5-- *************************************************************
6
7-- create the database
8DROP DATABASE IF EXISTS doordash;
9CREATE DATABASE doordash;
10
11-- select the database
12USE doordash;
13
14-- create tables
15CREATE TABLE events
16(
17 id INT NOT NULL AUTO_INCREMENT.
18 event_time TIMESTAMP NOT NULL,
19 user_id VARCHAR(255) NOT NULL,
20 event_type VARCHAR(50) NOT NULL,
21 platform VARCHAR(50) NOT NULL,
22 country VARCHAR(50) NOT NULL,
23 region VARCHAR(50) NOT NULL,
24 device_id VARCHAR(255) NOT NULL,
25 initial_referring_domain VARCHAR(50),
26PRIMARY KEY (user_id)
27);
28
29-- insert rows into the tables from the csv file sent in the email
30-- the event_time was edited to fit the proper format
31LOAD DATA INFILE 'C:/Users/Root/Downloads/Business Intelligence Exercise.csv'
32INTO TABLE events
33FIELDS TERMINATED BY ',' ENCLOSED BY '"'
34LINES TERMINATED BY '\n'
35IGNORE 1 ROWS;
36
37use doordash;
38-- What I am doing here is counting the unique user_id's when a specific event takes place
39-- The logic follows the step number based on the funnel definition
40select
41 distinct(user_id),
42 min(event_time) as event_time,
43 count(distinct(case when event_type = 'home_page' then user_id end)) step1,
44 count(distinct(case when event_type = 'store_ordering_page' then user_id end)) step2,
45 count(distinct(case when event_type = 'checkout_page' then user_id end)) step3,
46 count(distinct(case when event_type = 'checkout_success' then user_id end)) step4
47from events
48group by user_id;
49
50-- We should include search as step 4 in our funnel based on the following assumptions.
51-- My assumption is that a user at the checkout page will see the total price and perhaps search the event before committing to the
52-- purchase. The event search could be due to price or actual research done on the event by the user.
53-- Also if we plot this on a bar chart we can see event counts in desc order and event_search having more records than checkout is a -- valid assumption.
54
55select
56 distinct(user_id),
57 min(event_time) as event_time,
58 count(distinct(case when event_type = 'home_page' then user_id end)) step1,
59 count(distinct(case when event_type = 'store_ordering_page' then user_id end)) step2,
60 count(distinct(case when event_type = 'checkout_page' then user_id end)) step3,
61 count(distinct(case when event_type = 'search_event' then user_id end)) step4,
62 count(distinct(case when event_type = 'checkout_success' then user_id end)) step5
63from events
64group by user_id;