· 7 years ago · Oct 19, 2018, 01:22 AM
1USE x_ggoudeau;
2
3SET hive.exec.parallel=true;
4
5-- used to establish reasonable thresholds
6CREATE TABLE IF NOT EXISTS a_commerce_auction_information_statistics (
7
8 auction_agg_stats STRUCT<
9 nondistinct_auction_id_content_id: INT,
10 provider_repeated_in_auction: INT,
11 price_missing_currency: INT
12 >,
13
14 impression_agg_stats STRUCT<
15 nondistinct_auction_id_content_id: INT,
16 null_offer_slot: STRUCT<global: INT, hcom: INT, bcom: INT, agoda: INT, expedia: INT>
17 >,
18
19 left_join_agg_stats STRUCT<
20 row_count_difference_from_auction_agg: INT
21 >,
22
23 filtered_impression_agg_stats STRUCT<
24 rows_with_all_offers_below_plm: STRUCT<global: INT, hcom: INT, bcom: INT, agoda: INT, expedia: INT>
25 >,
26
27 -- {
28 -- "impressions_coverage": { "global": DOUBLE, large providers DOUBLE },
29 -- "clicks_coverage": { "global": DOUBLE, large providers DOUBLE },
30 -- "impressions_count": { "global": INT, large providers INT },
31 -- "clicks_count": { "global": INT, large providers INT },
32 -- "weighted_average_bid": { "global": DOUBLE, large providers DOUBLE },
33 -- "null_audience_id_count": { "global": INT, large providers INT },
34 -- "average_meet_rate": { "global": DOUBLE, large providers DOUBLE },
35 -- "average_beat_rate": { "global": DOUBLE, large providers DOUBLE },
36 -- "weighted_average_price_delta": { "global": DOUBLE, large providers DOUBLE },
37 -- "average_not_available_rate": { "global": DOUBLE, large providers DOUBLE },
38 -- "average_maybe_available_rate": { "global": DOUBLE, large providers DOUBLE },
39 -- }
40 final_agg_stats STRING,
41
42 -- {
43 -- "independent":
44 -- {
45 -- "los_outside_range": INT,
46 -- "check_in_date_in_past": INT,
47 -- "dta_outside_range": INT,
48 -- "invalid_default_date": INT,
49 -- "display_rank_outside_range": INT
50 -- },
51 -- "inter_column":
52 -- {
53 -- "invalid_price_stats": INT,
54 -- "nonzero_cost_with_zero_clicks": INT,
55 -- "nonzero_clicks_with_zero_cost": INT,
56 -- "clicks_and_cost_with_no_display_rank": INT,
57 -- "price_stats_but_not_available": INT
58 -- }
59 -- }
60 row_level_stats STRING
61)
62PARTITIONED BY (ds STRING);
63
64CREATE TABLE IF NOT EXISTS a_commerce_auction_information_validation_failures (
65 failure_message STRING
66)
67PARTITIONED BY (ds STRING);
68
69-- auction agg CTEs
70WITH current_meta_auctions_agg AS (
71 SELECT * FROM a_commerce_ai_meta_auctions_in_hr WHERE ds = '2018-10-10'
72),
73total_auction_count AS (
74 SELECT COUNT(*) AS total_num FROM current_meta_auctions_agg
75),
76nondistinct_auction_content_ids_count AS (
77 SELECT COUNT(*) AS nondistinct_count FROM (
78 SELECT auction_id, content_id, COUNT(*) FROM current_meta_auctions_agg GROUP BY auction_id, content_id HAVING COUNT(*) > 1
79 ) t
80),
81duplicated_providers_in_auction AS (
82 SELECT COUNT(*) AS duplicated_count FROM (
83 SELECT auction_id, provider_id, COUNT(*) FROM current_meta_auctions_agg
84 GROUP BY auction_id, provider_id HAVING COUNT(*) > 1
85 ) t
86),
87price_missing_currency_count AS (
88 SELECT SUM(CASE WHEN display_price IS NOT NULL AND currency IS NULL THEN 1 ELSE 0 END) AS missing_count FROM current_meta_auctions_agg
89),
90
91-- impressions agg CTEs
92current_impressions_agg AS (
93 SELECT * FROM a_commerce_ai_impressions_in_hr WHERE ds = '2018-10-10'
94),
95nondistinct_impression_auction_content_ids_count AS (
96 SELECT COUNT(*) AS nondistinct_count FROM (
97 SELECT auction_id, content_id, COUNT(*) FROM current_impressions_agg GROUP BY auction_id, content_id HAVING COUNT(*) > 1
98 ) t
99),
100impression_entries_with_provider_with_null_offer_slot AS (
101 SELECT cia.*, auc.provider_id
102 FROM current_impressions_agg cia
103 JOIN current_meta_auctions_agg auc ON cia.auction_id = auc.auction_id
104 WHERE offer_slot IS NULL
105),
106null_offer_slot_count AS (
107 SELECT
108 global_count.count AS global,
109 hcom_count.count AS hcom,
110 agoda_count.count AS agoda,
111 bcom_count.count AS bcom,
112 expedia_count.count AS expedia
113 FROM (SELECT COUNT(*) AS count FROM impression_entries_with_provider_with_null_offer_slot) global_count
114 JOIN (SELECT COUNT(*) AS count FROM impression_entries_with_provider_with_null_offer_slot WHERE provider_id = 8512) hcom_count
115 JOIN (SELECT COUNT(*) AS count FROM impression_entries_with_provider_with_null_offer_slot WHERE provider_id = 8054) agoda_count
116 JOIN (SELECT COUNT(*) AS count FROM impression_entries_with_provider_with_null_offer_slot WHERE provider_id = 8049) expedia_count
117 JOIN (SELECT COUNT(*) AS count FROM impression_entries_with_provider_with_null_offer_slot WHERE provider_id = 7994) bcom_count
118),
119
120-- left join agg CTEs
121current_left_join_agg AS (
122 SELECT * FROM a_commerce_ai_auctions_left_join_impressions WHERE ds = '2018-10-10'
123),
124
125-- filtered impressions CTEs
126current_filtered_impressions_agg AS (
127 SELECT provider_id, all_offers_below_plm FROM a_commerce_ai_auction_impression_filtered WHERE ds = '2018-10-10'
128),
129filtered_impression_rows_all_below_plm AS (
130 SELECT provider_id FROM current_filtered_impressions_agg WHERE all_offers_below_plm
131),
132all_below_plm_with_provider AS (
133 SELECT
134 global_count.count AS global,
135 hcom_count.count AS hcom,
136 agoda_count.count AS agoda,
137 bcom_count.count AS bcom,
138 expedia_count.count AS expedia
139 FROM (SELECT COUNT(*) AS count FROM filtered_impression_rows_all_below_plm) global_count
140 JOIN (SELECT COUNT(*) AS count FROM filtered_impression_rows_all_below_plm WHERE provider_id = 8512) hcom_count
141 JOIN (SELECT COUNT(*) AS count FROM filtered_impression_rows_all_below_plm WHERE provider_id = 8054) agoda_count
142 JOIN (SELECT COUNT(*) AS count FROM filtered_impression_rows_all_below_plm WHERE provider_id = 8049) expedia_count
143 JOIN (SELECT COUNT(*) AS count FROM filtered_impression_rows_all_below_plm WHERE provider_id = 7994) bcom_count
144),
145
146-- final agg CTEs
147current_final_agg AS (
148 SELECT * FROM a_commerce_ai_daily_aggregation WHERE ds = '2018-10-10'
149),
150
151-- build the final stat structs
152auction_agg_stats_cte AS (
153 SELECT named_struct(
154 'nondistinct_auction_id_content_id', CAST(nondistinct_auction_content_ids_count.nondistinct_count AS INT),
155 'provider_repeated_in_auction', CAST(duplicated_providers_in_auction.duplicated_count AS INT),
156 'price_missing_currency', CAST(price_missing_currency_count.missing_count AS INT)
157 ) AS stats
158 FROM total_auction_count
159 JOIN nondistinct_auction_content_ids_count
160 JOIN duplicated_providers_in_auction
161 JOIN price_missing_currency_count
162),
163impression_agg_stats_cte AS (
164 SELECT named_struct(
165 'nondistinct_auction_id_content_id', CAST(nondistinct_impression_auction_content_ids_count.nondistinct_count AS INT),
166 'null_offer_slot', named_struct(
167 'global', CAST(null_offer_slot_count.global AS INT),
168 'hcom', CAST(null_offer_slot_count.hcom AS INT),
169 'bcom', CAST(null_offer_slot_count.bcom AS INT),
170 'agoda', CAST(null_offer_slot_count.agoda AS INT),
171 'expedia', CAST(null_offer_slot_count.expedia AS INT))
172 ) AS stats
173 FROM nondistinct_impression_auction_content_ids_count
174 JOIN null_offer_slot_count
175),
176left_join_agg_stats_cte AS (
177 SELECT named_struct(
178 'row_count_difference_from_auction_agg', CAST(total_auction_count.total_num - left_join_count.total_num AS INT)
179 ) AS stats
180 FROM total_auction_count
181 JOIN (SELECT COUNT(*) total_num FROM current_left_join_agg) left_join_count
182),
183filtered_agg_stats_cte AS (
184 SELECT named_struct('rows_with_all_offers_below_plm', named_struct(
185 'global', CAST(all_below_plm_with_provider.global AS INT),
186 'hcom', CAST(all_below_plm_with_provider.hcom AS INT),
187 'bcom', CAST(all_below_plm_with_provider.bcom AS INT),
188 'agoda', CAST(all_below_plm_with_provider.agoda AS INT),
189 'expedia', CAST(all_below_plm_with_provider.expedia AS INT))
190 ) AS stats
191 FROM all_below_plm_with_provider
192)
193INSERT OVERWRITE TABLE a_commerce_auction_information_statistics PARTITION (ds = '2018-10-10')
194SELECT
195 auction_agg_stats_cte.stats AS auction_agg_stats,
196 impression_agg_stats_cte.stats AS impression_agg_stats,
197 left_join_agg_stats_cte.stats AS left_join_agg_stats,
198 filtered_agg_stats_cte.stats AS filtered_impression_agg_stats,
199 null AS final_agg_stats,
200 null AS row_level_stats
201FROM
202 auction_agg_stats_cte
203 JOIN impression_agg_stats_cte
204 JOIN left_join_agg_stats_cte
205 JOIN filtered_agg_stats_cte
206;