· 6 years ago · Sep 08, 2019, 12:44 PM
1CREATE EXTERNAL TABLE IF NOT EXISTS segment_logs.eventlogs (
2anonymousid string , # pick columns you care about!
3context map<string,string> , # using a map for nested JSON
4messageid string ,
5timestamp Timestamp ,
6type string ,
7userid string ,
8traits map<string,string> ,
9event string
10)
11PARTITIONED BY (sourceid string) # partition by the axes you expect to query often, sourceid here is associated with each source of data
12ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
13LOCATION 's3://your-s3-bucket/segment-logs' # location of your data in S3