· 6 years ago · Jan 08, 2020, 09:56 AM
1#!/bin/bash
2# PRZETWARZANIE etap1
3# CREATE etap1 database
4hive -e "Create DATABASE etap1"
5
6# CREATE reduced name_basics table
7hive -e "CREATE TABLE IF NOT EXISTS etap1.name_basics AS SELECT nconst, primaryName FROM default.name_basics;"
8
9# CREATE reduced TitleBasics
10hive -e "CREATE TABLE IF NOT EXISTS etap1.title_basics AS SELECT tconst, titleType, startYear, endYear FROM default.title_basics;"
11
12# CREATE JOINED title_ratings_years
13hive -e "CREATE TABLE IF NOT EXISTS etap1.title_ratings_years AS SELECT title_ratings.tconst as tconst, title_ratings.averagerating as averagerating, title_ratings.numvotes as numvotes, title_basics.startyear as startyear FROM title_ratings LEFT JOIN title_basics ON title_ratings.tconst = title_basics.tconst;"
14
15# Rozbijanie po spacjach (potrzebne do atomizecrew)
16# SELECT tconst, uniqueDirectors as director, writers FROM (
17# SELECT tconst, split(directors, '\,') as directors, writers FROM default.title_crew P
18# ) a lateral view explode(a.directors) exploded as uniqueDirectors;
19
20# Title_crew z ominieciem atomizecrew
21"CREATE TABLE IF NOT EXISTS etap1.title_crew AS SELECT tconst, nconst, CASE category WHEN "director" THEN true ELSE false END as isDirector, CASE category WHEN "actor" THEN true ELSE false END as isActor FROM default.title_principals WHERE category = 'director' OR category = 'actor';"