· 6 years ago · Jun 26, 2019, 05:42 PM
1DROP TABLE IF EXISTS movies.movierating;
2DROP TABLE IF EXISTS movies.list;
3DROP TABLE IF EXISTS movies.rating;
4DROP DATABASE IF EXISTS movies;
5
6ADD JAR /usr/local/hadoop/hive/hive/lib/RegexLoader.jar;
7
8CREATE DATABASE IF NOT EXISTS movies;
9
10CREATE EXTERNAL TABLE IF NOT EXISTS movies.list (id STRING, name STRING, genre STRING)
11ROW FORMAT SERDE 'com.cisco.hadoop.loaders.RegexSerDe'with SERDEPROPERTIES(
12 "input.regex"="^(.*)\:\:(.*)\:\:(.*)$",
13 "output.format.string"="%1$s %2$s %3$s");
14
15 CREATE EXTERNAL TABLE IF NOT EXISTS movies.rating (id STRING, userid STRING, rating STRING, timestamp STRING)
16 ROW FORMAT SERDE 'com.cisco.hadoop.loaders.RegexSerDe'
17 with SERDEPROPERTIES(
18 "input.regex"="^(.*)\:\:(.*)\:\:(.*)\:\:(.*)$",
19 "output.format.string"="%1$s %2$s %3$s %4$s");
20
21 LOAD DATA LOCAL INPATH 'ml-10M100K/movies.dat' into TABLE movies.list;
22 LOAD DATA LOCAL INPATH 'ml-10M100K/ratings.dat' into TABLE movies.rating;
23
24 CREATE TABLE movies.movierating(id STRING, name STRING, genre STRING, rating STRING);
25
26 INSERT OVERWRITE TABLE movies.movierating
27 SELECT list.id, list.name, list.genre, rating.rating from movies.list list LEFT JOIN movies.rating rating ON (list.id=rating.id) GROUP BY list.id;
28
29FAILED: SemanticException [Error 10002]: Line 4:21 Invalid column reference 'name'
30
31id|name |genre
32--+-----+------
3301|name1|comedy
3401|name2|horror
35
36select list.id, list.name, list.genre from list group by list.id;
37
38id|name |genre |rating
39--+-----+------+-------
4001|name1|comedy|'pretty good'
4101|name1|comedy|'bad'
4202|name2|horror|'9/10'
4303|name3|action|NULL
44
45ID | Movie Name | Genre
461 | Movie 1 | comedy
472 | movie 2 | action
483 | movie 3 | thriller
49
50MOVIE_ID | USER ID | RATING on 5 | TIMESTAMP
511 | xyz | 5 | 12345612
521 | abc | 4 | 23232312
532 | zvc | 1 | 12321123
542 | zyx | 2 | 12312312
55
56Movie ID | Movie Name | Genre | Rating Average
571 | Movie 1 | comedy | 4.5
582 | Movie 2 | action | 1.5
59
60CREATE TABLE movies.movierating(id STRING, name STRING, genre STRING, rating FLOAT);
61
62INSERT OVERWRITE TABLE movies.movierating
63SELECT list.id, list.name, list.genre, AVG(cast(rating.rating as FLOAT)) from movies.list list LEFT JOIN movies.rating rating ON (list.id=rating.id) GROUP BY list.id, list.name,list.genre order by list.id DESC;
64
65org.apache.hadoop.hive.ql.parse.SemanticException: Invalid column reference "charge_province"