· 7 years ago · Jan 14, 2019, 01:26 PM
1-- Summary table having URI and dte
2
3-- HIVE CONF
4set hive.exec.dynamic.partition.mode=nonstrict;
5
6-- external parameters
7-- set RUN_DATE=2019-01-05;
8-- USAGE:
9-- hive -hiveconf RUN_DATE='2019-01-05' -f summary.hql
10
11CREATE EXTERNAL TABLE IF NOT EXISTS prod.summary_access_log(
12 uri STRING,
13 count INT)
14 PARTITIONED BY (dte STRING)
15STORED AS PARQUET
16LOCATION 's3://nikita-ds-playground/prod/summary_access_log/';
17
18
19INSERT OVERWRITE TABLE prod.summary_access_log PARTITION(dte)
20 SELECT
21 uri,
22 COUNT(uri) as count,
23 dte
24 FROM prod.clean_access_log
25 WHERE dte = '${hiveconf:RUN_DATE}'
26 GROUP BY dte,uri ;