· 7 years ago · Oct 29, 2018, 03:32 AM
1for x in *; do hadoop fs -put $x /user/unravel/; done
2
3cd /opt/h*/driver_data
4for x in *; do hadoop fs -put $x /user/unravel/; done
5
6hive -e "create table temp_drivers (col_value STRING); load data inpath '/user/unravel/drivers.csv' OVERWRITE INTO TABLE temp_drivers;"
7hive -e "select * from temp_drivers;"
8
9hive -e "CREATE TABLE IF NOT EXISTS drivers (driverId INT, name STRING, ssn BIGINT, location STRING, certified STRING, wageplan STRING); insert overwrite table drivers SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location, regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified, regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan from temp_drivers;"
10hive -e " select * from drivers"
11
12
13hive -e "CREATE TABLE IF NOT EXISTS temp_timesheet (col_value string); LOAD DATA INPATH '/user/unravel/timesheet.csv' OVERWRITE INTO TABLE temp_timesheet;"
14hive -e "select * from temp_timesheet;"
15
16hive -e "CREATE TABLE IF NOT EXISTS timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT); insert overwrite table timesheet SELECT regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId, regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week, regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged, regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged from temp_timesheet;"
17hive -e "select * from timesheet;"
18
19
20export WORKFLOW_NAME="Notif4MissedSLA"
21#export WORKFLOW_NAME="WealthMgmt"
22export UTC_TIME_STAMP=$(date -u '+%Y%m%dT%H%M%SZ')
23#export HS2='vego1.unraveldatalab.com'
24#export HS2='127.0.0.1'
25
26beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM timesheet GROUP BY driverId ) t ON (d.driverId = t.driverId);"
27
28sleep 35
29
30beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use default; SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY driverId;"
31
32
33sleep 40
34#beeline -u "jdbc:hive2://$HS2:10000" -n hdfs --hiveconf unravel.workflow.name=${WORKFLOW_NAME} --hiveconf unravel.workflow.utctimestamp=${UTC_TIME_STAMP} -e "use tpcds_text_30; select count(*) from reason;"
35
36spark-submit --conf "spark.unravel.workflow.name=${WORKFLOW_NAME}" --conf "spark.unravel.workflow.utctimestamp=$UTC_TIME_STAMP" --conf "spark.eventLog.enabled=true" --class org.apache.spark.examples.SparkPi --master yarn-cluster /opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples.jar 10 100