· 6 years ago · Apr 20, 2019, 03:18 PM
1SET hive.exec.dynamic.partition.mode=nonstrict;
2SET hive.exec.max.dynamic.partitions=2000;
3SET hive.exec.max.dynamic.partitions.pernode=500;
4USE testdb;
5
6DROP TABLE IF EXISTS temp_Airline PURGE;
7
8CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_Airline(
9 Year SMALLINT,
10 Month TINYINT,
11 DayOfMonth TINYINT,
12 DayOfWeek TINYINT,
13 DepTime SMALLINT,
14 CRSDepTime SMALLINT,
15 ArrTime SMALLINT,
16 CRSArrTime SMALLINT,
17 UniqueCarrier VARCHAR(7),
18 FlightNum SMALLINT,
19 TailNum STRING,
20 ActualElapsedTime SMALLINT,
21 CRSElapsedAirtime SMALLINT,
22 AirTime TINYINT,
23 ArrDelay SMALLINT,
24 DepDelay SMALLINT,
25 Origin CHAR(3),
26 Dest CHAR(3),
27 Distance INT,
28 TaxiIn SMALLINT,
29 TaxiOut SMALLINT,
30 Cancelled TINYINT,
31 CancellationCode STRING,
32 Diverted TINYINT,
33 CarrierDelay SMALLINT,
34 WeatherDelay SMALLINT,
35 NASDelay SMALLINT,
36 SecurityDelay SMALLINT,
37 LateAircraftDelay SMALLINT)
38 ROW FORMAT DELIMITED
39 FIELDS TERMINATED BY ','
40 STORED AS TEXTFILE
41 LOCATION '/input/airline'
42 TBLPROPERTIES ('skip.header.line.count'='1');
43
44DROP TABLE IF EXISTS Airline PURGE;
45
46CREATE TABLE Airline (
47 FlightNum SMALLINT,
48 TailNum STRING,
49 DepDate TIMESTAMP,
50 Delay SMALLINT,
51 Carrier VARCHAR(7))
52 COMMENT '2008 airlines'
53 PARTITIONED BY(Airport CHAR(3))
54 CLUSTERED BY(Carrier) INTO 8 BUCKETS
55 STORED AS ORC;
56
57INSERT OVERWRITE TABLE Airline PARTITION(Airport)
58 SELECT s.FlightNum, s.TailNum, CAST(PRINTF('%d-%02d-%02d %02.0f:%02d:00', s.Year, s.Month, s.DayOfMonth, (s.DepTime - 50) / 100, s.DepTime % 100) AS TIMESTAMP), IF(s.ArrDelay>0, s.ArrDelay, 0) as ArrDelay, s.UniqueCarrier, s.Dest
59 FROM temp_Airline s WHERE Cancelled=0 and Diverted=0;
60
61
62------------------------------------------------------------------------------------------------------------------------------------
63USE testdb;
64
65SELECT t.Carrier, t.Airport, t.AvgDelay FROM (
66 SELECT d.Carrier, d.Airport, d.AvgDelay, DENSE_RANK() OVER (PARTITION BY d.Airport ORDER BY d.AvgDelay DESC) AS Rank FROM
67 (SELECT Airport, Carrier, AVG(Delay) AS AvgDelay FROM Airline GROUP BY Airport, Carrier) as d) as t
68 WHERE t.Rank < 3 ORDER BY AvgDelay DESC;