· 7 years ago · Dec 09, 2018, 01:54 AM
1-- *************************************************************
2-- This script creates and populates report performance of our conversion funnel.
3-- I will be using MYSQL for this exercise.
4-- *************************************************************
5
6-- create the database
7DROP DATABASE IF EXISTS doordash;
8CREATE DATABASE doordash;
9
10-- select the database
11USE doordash;
12
13-- create tables
14CREATE TABLE events
15(
16 event_time TIMESTAMP NOT NULL,
17 user_id VARCHAR(255) NOT NULL,
18 event_type VARCHAR(50) NOT NULL,
19 platform VARCHAR(50) NOT NULL,
20 country VARCHAR(50) NOT NULL,
21 region VARCHAR(50) NOT NULL,
22 device_id VARCHAR(255) NOT NULL,
23 initial_referring_domain VARCHAR(50),
24PRIMARY KEY (user_id)
25);
26
27-- insert rows into the tables from the csv file sent in the email
28-- the event_time was edited to fit the proper format
29LOAD DATA INFILE 'C:/Users/Root/Downloads/Business Intelligence Exercise.csv'
30INTO TABLE events
31FIELDS TERMINATED BY ',' ENCLOSED BY '"'
32LINES TERMINATED BY '\n'
33IGNORE 1 ROWS;
34
35use doordash;
36-- What I am doing here is counting the unique user_id's when a specific event takes place
37-- The logic follows the step number based on the funnel definition
38select
39 distinct(user_id),
40 min(event_time) as event_time,
41 count(distinct(case when event_type = 'home_page' then user_id end)) step1,
42 count(distinct(case when event_type = 'store_ordering_page' then user_id end)) step2,
43 count(distinct(case when event_type = 'checkout_page' then user_id end)) step3,
44 count(distinct(case when event_type = 'checkout_success' then user_id end)) step4
45from events
46group by user_id;
47
48-- We should include search as step 4 in our funnel based on the following assumptions.
49-- My assumption is that a user at the checkout page will see the total price and perhaps search the event before committing to the
50-- purchase. The event search could be due to price or actual research done on the event by the user.
51-- Also if we plot this on a bar chart we can see event counts in desc order and event_search having more records than checkout is a -- valid assumption.
52
53select
54 distinct(user_id),
55 min(event_time) as event_time,
56 count(distinct(case when event_type = 'home_page' then user_id end)) step1,
57 count(distinct(case when event_type = 'store_ordering_page' then user_id end)) step2,
58 count(distinct(case when event_type = 'checkout_page' then user_id end)) step3,
59 count(distinct(case when event_type = 'search_event' then user_id end)) step4,
60 count(distinct(case when event_type = 'checkout_success' then user_id end)) step5
61from events
62group by user_id;