· 5 years ago · May 05, 2020, 02:14 PM
1SET hive.exec.dynamic.partition.mode=nonstrict;
2SET hive.exec.max.dynamic.partitions=2000;
3SET hive.exec.max.dynamic.partitions.pernode=500;
4USE chaladynalek;
5
6DROP TABLE IF EXISTS temp_statenames PURGE;
7
8CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_statenames (
9id INT,
10name STRING,
11year STRING,
12gender CHAR(1),
13state STRING,
14count INT)
15
16ROW FORMAT DELIMITED
17FIELDS TERMINATED BY ','
18STORED AS TEXTFILE
19LOCATION '/chaladynalek/statenames'
20TBLPROPERTIES ('skip.border.line.count'='1');
21
22DROP TABLE IF EXISTS Statenames PURGE;
23
24CREATE TABLE Statenames (
25id INT,
26name STRING,
27gender CHAR(1),
28state STRING,
29count INT)
30
31 PARTITIONED BY(year STRING)
32 CLUSTERED BY(state) INTO 8 BUCKETS
33 STORED AS ORC;
34
35INSERT OVERWRITE TABLE Statenames PARTITION(year)
36
37SELECT id, name, gender, state, count, year
38FROM (SELECT name, year, count(*) AS count,
39 row_number() over (PARTITION BY year ORDER BY count(*) DESC) AS seqnum
40 FROM temp_statenames
41 GROUP BY name, year
42 ) yn
43WHERE seqnum = 1;