· 6 years ago · May 06, 2019, 04:48 AM
1创建 HIVE 表
2```sql
3CREATE EXTERNAL TABLE IF NOT EXISTS cloudfront_logs (
4 DateObject Date,
5 Time STRING,
6 Location STRING,
7 Bytes INT,
8 RequestIP STRING,
9 Method STRING,
10 Host STRING,
11 Uri STRING,
12 Status INT,
13 Referrer STRING,
14 OS String,
15 Browser String,
16 BrowserVersion String
17)
18ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
19WITH SERDEPROPERTIES (
20 "input.regex" = "^(?!#)([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+([^ ]+)\\s+[^\(]+[\(]([^\;]+).*\%20([^\/]+)[\/](.*)$"
21) LOCATION 's3://cn-northwest-1.elasticmapreduce.samples/cloudfront/data';
22
23```
24
25执行查询
26```sql
27INSERT OVERWRITE DIRECTORY 's3://<bucket-name>/hive/cloudfront_etl/'
28SELECT os, COUNT(*) count
29FROM cloudfront_logs WHERE dateobject BETWEEN '2014-07-05' AND '2014-08-05' GROUP BY os;
30```