· 7 years ago · Jan 13, 2019, 04:58 AM
1ADD JAR hdfs:///user/maria_dev/json-serde-1.3.8-jar-with-dependencies.jar;
2
3set hive.support.sql11.reserved.keywords=false;
4
5CREATE EXTERNAL TABLE if NOT EXISTS tweets (
6text STRING,
7entities STRUCT<
8 hashtags:ARRAY<STRUCT<text:STRING>>>,
9 user STRUCT<
10 screen_name:STRING,
11 friends_count:INT,
12 followers_count:INT,
13 location:STRING,
14 verified:BOOLEAN>
15
16)
17ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
18LOCATION '/user/hive/input/twitter';
19
20
21select distinct user.screen_name as name , user.followers_count as count
22from tweets
23where size(entities.hashtags) > 0
24and user.location like '%India%'
25order by count desc
26limit 5;