· 4 months ago · May 12, 2025, 07:05 AM
1spark.sql("""
2CREATE TABLE IF NOT EXISTS businessanalytics.mdsp_ssp_match_test (
3 ssp_day TIMESTAMP,
4 ssp_impression_id STRING,
5 ssp_banner_id BIGINT,
6 ssp_campaign_id BIGINT,
7 ssp_pad_id BIGINT,
8 ssp_block_id BIGINT,
9 ssp_referer STRING,
10 ssp_orig_pad_id BIGINT,
11 ssp_misc BIGINT,
12 ssp_visitor_uuid STRING,
13 ssp_rq_sess STRING,
14 ssp_orig_block_id BIGINT,
15 ssp_uuid_status STRING,
16 ssp_price_paid DOUBLE,
17 ssp_price_orig DOUBLE,
18 ssp_seqnum BIGINT,
19 ssp_revenue_source_id BIGINT,
20 mdsp_dt TIMESTAMP,
21 mdsp_request_id STRING,
22 mdsp_account_id BIGINT,
23 mdsp_campaign_id BIGINT,
24 mdsp_flight_id BIGINT,
25 top100_project_id BIGINT,
26 mdsp_ruid STRING
27)
28PARTITIONED BY (request_site_id string)
29STORED AS PARQUET;
30""")
31
32request_site_ids = [
33 432328744, 437727306, 432172862,
34 437707758, 437716356, 432190802,
35 437707674, 428678158
36]
37
38for site_id in request_site_ids:
39 spark.sql(f"""
40 ALTER TABLE businessanalytics.mdsp_ssp_match_test
41 ADD IF NOT EXISTS PARTITION (request_site_id={site_id})
42 """)
43
44
45df = spark.sql(f"""
46 SELECT
47 ssp.day AS ssp_day,
48 ssp.impression_id AS ssp_impression_id,
49 ssp.banner_id AS ssp_banner_id,
50 ssp.campaign_id AS ssp_campaign_id,
51 ssp.pad_id AS ssp_pad_id,
52 ssp.block_id AS ssp_block_id,
53 ssp.referer AS ssp_referer,
54 ssp.orig_pad_id AS ssp_orig_pad_id,
55 ssp.misc AS ssp_misc,
56 regexp_replace(ssp.visitor_uuid, '-', '') AS ssp_visitor_uuid,
57 ssp.rq_sess AS ssp_rq_sess,
58 ssp.orig_block_id AS ssp_orig_block_id,
59 ssp.uuid_status AS ssp_uuid_status,
60 ssp.price_paid AS ssp_price_paid,
61 ssp.price_orig AS ssp_price_orig,
62 ssp.seqnum AS ssp_seqnum,
63 ssp.revenue_source_id AS ssp_revenue_source_id,
64 mdsp.dt AS mdsp_dt,
65 mdsp.bid_request_id AS mdsp_request_id,
66 mdsp.adv_account_id AS mdsp_account_id,
67 mdsp.campaign_id AS mdsp_campaign_id,
68 mdsp.ad_group_id AS mdsp_flight_id,
69 mdsp.request_site_id AS mdsp_request_site_id,
70 case
71 when request_site_id in (432328744, 437727306) then 46589
72 when request_site_id in (432172862, 437707758) then 80674
73 when request_site_id in (437716356, 432190802) then 648840
74 when request_site_id in (437707674, 428678158) then 127846
75 else null
76 end as top100_project_id,
77 mdsp.gruid AS mdsp_ruid
78 FROM
79 businessanalytics.adtechstatistics_sspshowlog_cloud_test ssp
80 FULL JOIN
81 businessanalytics.mdsp_event_cloud_test mdsp
82 ON
83 ssp.pad_id = mdsp.request_site_id
84 AND regexp_replace(ssp.visitor_uuid, '-', '') = lower(mdsp.gruid)
85 AND ssp.impression_id = mdsp.bid_request_id
86 limit 10
87 """)
88
89
90df.write.format("parquet").mode("overwrite") \
91 .saveAsTable("businessanalytics.mdsp_ssp_match_test")