· 6 years ago · Jul 18, 2019, 03:40 PM
1CREATE EXTERNAL TABLE IF NOT EXISTS 'database'.apache_logs (
2 host STRING,
3 identity STRING,
4 user STRING,
5 time STRING,
6 request STRING,
7 status STRING,
8 size STRING,
9 referer STRING,
10 agent STRING
11) PARTITIONED BY (
12 year int,
13 month int,
14 day int,
15 hour int
16)
17ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
18WITH SERDEPROPERTIES (
19 "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?",
20 "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"
21) LOCATION 's3://<Bucket>'