· 5 years ago · Apr 04, 2020, 12:48 PM
1add jar /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
2add jar /opt/cloudera/parcels/CDH/lib/hive/lib/hive-serde.jar;
3add jar /opt/cloudera/parcels/CDH/lib/hive/lib/json-serde-1.3.8-jar-with-dependencies.jar;
4
5CREATE DATABASE IF NOT EXISTS rodindm_kkt LOCATION '/user/hob2020051/task2';
6USE rodindm_kkt;
7
8DROP TABLE IF EXISTS kkt_document_json;
9CREATE EXTERNAL TABLE kkt_document_json (
10 `_id` STRING,
11 fsId STRING,
12 kktRegId STRING,
13 subtype STRING,
14 receiveDate STRING,
15 protocolVersion BIGINT,
16 ofdId STRING,
17 protocolSubversion BIGINT,
18 content struct<kktregid:STRING, userinn:STRING, totalsum:BIGINT>,
19 documentId BIGINT
20)
21 ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
22 WITH SERDEPROPERTIES ( "ignore.malformed.json" = "true" )
23 STORED AS TEXTFILE
24 LOCATION '/data/hive/fns2';
25
26
27DROP TABLE IF EXISTS task_2_table;
28CREATE TABLE task_2_table STORED AS TEXTFILE AS SELECT content.userinn as inn, content.totalsum as totalsum FROM kkt_document_json WHERE subtype=='receipt';
29
30SELECT inn, SUM (totalsum) AS tsum FROM task_2_table GROUP BY inn ORDER BY tsum DESC LIMIT 1;