· 5 years ago · May 05, 2020, 01:44 PM
1SET hive.exec.dynamic.partition.mode=nonstrict;
2SET hive.exec.max.dynamic.partitions=2000;
3SET hive.exec.max.dynamic.partitions.pernode=500;
4USE chaladynalek;
5
6DROP TABLE IF EXISTS temp_statenames PURGE;
7
8CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_statenames (
9id INT,
10name STRING,
11year STRING,
12gender CHAR(1),
13state STRING,
14count INT)
15
16ROW FORMAT DELIMITED
17FIELDS TERMINATED BY ','
18STORED AS TEXTFILE
19LOCATION '/chaladynalek/statenames'
20TBLPROPERTIES ('skip.border.line.count'='1');
21
22DROP TABLE IF EXISTS Statenames PURGE;
23
24CREATE TABLE Statenames (
25id INT,
26name STRING,
27year STRING,
28gender CHAR(1),
29state STRING,
30count INT)
31
32 PARTITIONED BY(year)
33 CLUSTERED BY(state) INTO 8 BUCKETS
34 STORED AS ORC;
35
36INSERT OVERWRITE TABLE Statenames PARTITION(year)
37
38SELECT year, name
39FROM (SELECT year, name, count(*) AS cnt,
40 row_number() over (PARTITION BY year ORDER BY count(*) DESC) AS seqnum
41 FROM temp_statenames
42 GROUP BY year, name
43 ) yn
44WHERE seqnum = 1;