· 5 years ago · Aug 03, 2020, 10:58 AM
1package com.bilibili.manga.etl
2
3import com.bilibili.manga.etl.common.spark.date.LogDate
4import com.bilibili.manga.etl.common.spark.raw.RawReader.{RawData, getRaws}
5import org.apache.spark.sql.SparkSession
6import org.apache.spark.sql.functions.desc
7import org.json4s._
8import org.json4s.jackson.JsonMethods._
9
10import scala.concurrent._
11import duration._
12
13//category : getbytag ep rank topic price
14
15object KuaikanPriceDetailParser {
16
17 def main(args: Array[String]): Unit = {
18 val spark: SparkSession = SparkSession.builder().appName("kuaikan_price_parse_job").enableHiveSupport().getOrCreate()
19 import spark.implicits._
20 import spark.sql
21
22 // 读出数据
23 val raws = getRaws(spark, "ods.hydralisk_kuaikan_raw_data", "price")
24 // 去重
25 // val duplicated = raws.sort(desc("mtime")).dropDuplicates("raw_key")
26 val duplicated = raws.sort(desc("id")).dropDuplicates(Seq("raw_key"))
27 println("duplicated count:" + duplicated.count)
28
29 // 转化
30 val res = duplicated.map(r => parseResult(r))
31 println("res count:" + res.count)
32
33 // 新建下游表
34 val tableName = "tmp_manga.dwd_competitor_kuaikan_ep_df"
35 sql(createSql(tableName))
36
37 // 写入
38 res.write.insertInto(tableName)
39 println("insert end")
40 spark.stop()
41 // sql("select count(1) from tmp_manga.dwd_competitor_u17_comic_df ").show()
42 }
43
44 case class Result(
45 comic_id:String,
46 kk_currency_balance:String,
47 is_auto_pay:String,
48 default_bucket:String,
49 entire_preferential:String,
50 coupon_coupon_count:String,
51 pic_text_banner:String,
52 auto_pay_reminder_show:String,
53 auto_pay_reminder_selected:String,
54// batch_purchase_list:String,
55 adv_view_has_adv:String,
56 adv_view_switch_buy_btn_text:String,
57 adv_banner:String,
58 pic_text_banner_id:String,
59 pic_text_banner_text1:String,
60 pic_text_banner_text2:String,
61 pic_text_banner_pic:String,
62 pic_text_banner_last_update_time:String,
63 pic_text_banner_text_type:String,
64 action_target_action_type:String,
65 action_target_target_id:String,
66 action_target_target_app_url:String,
67 action_target_target_web_url:String,
68 action_target_target_package_name:String,
69 action_target_target_title:String,
70 action_target_hybrid_url:String,
71 action_target_target_guide:String,
72 action_target_target_guide_name:String,
73 action_target_non_iap_supported:String,
74 pic_text_banner_adv_info:String,
75 pic_text_banner_personality_hit_result:String,
76 bubble_text:String,
77 top_banner:String,
78 vip_info:String,
79 toast_text_batch_to_card:String,
80 toast_text_card_to_single:String,
81 autoPay:String,
82 raw_id:String,
83 log_date:String
84 )
85
86 def createSql(table: String): String = {
87 """
88 |create table if not exists %s
89 |(
90 |comic_id String COMMENT'',
91 |kk_currency_balance String COMMENT'',
92 |is_auto_pay String COMMENT'',
93 |default_bucket String COMMENT'',
94 |entire_preferential String COMMENT'',
95 |coupon_coupon_count String COMMENT'',
96 |pic_text_banner String COMMENT'',
97 |auto_pay_reminder_show String COMMENT'',
98 |auto_pay_reminder_selected String COMMENT'',
99 |-- batch_purchase_list String COMMENT'',
100 |adv_view_has_adv String COMMENT'',
101 |adv_view_switch_buy_btn_text String COMMENT'',
102 |adv_banner String COMMENT'',
103 |pic_text_banner_id String COMMENT'',
104 |pic_text_banner_text1 String COMMENT'',
105 |pic_text_banner_text2 String COMMENT'',
106 |pic_text_banner_pic String COMMENT'',
107 |pic_text_banner_last_update_time String COMMENT'',
108 |pic_text_banner_text_type String COMMENT'',
109 |action_target_action_type String COMMENT'',
110 |action_target_target_id String COMMENT'',
111 |action_target_target_app_url String COMMENT'',
112 |action_target_target_web_url String COMMENT'',
113 |action_target_target_package_name String COMMENT'',
114 |action_target_target_title String COMMENT'',
115 |action_target_hybrid_url String COMMENT'',
116 |action_target_target_guide String COMMENT'',
117 |action_target_target_guide_name String COMMENT'',
118 |action_target_non_iap_supported String COMMENT'',
119 |pic_text_banner_adv_info String COMMENT'',
120 |pic_text_banner_personality_hit_result String COMMENT'',
121 |bubble_text String COMMENT'',
122 |top_banner String COMMENT'',
123 |vip_info String COMMENT'',
124 |toast_text_batch_to_card String COMMENT'',
125 |toast_text_card_to_single String COMMENT'',
126 |autoPay String COMMENT'',
127 |raw_id String comment ''
128 |)
129 |COMMENT '快看漫画DWD层章节价格信息'
130 |partitioned by (log_date STRING)
131 |STORED AS orc
132 |""".stripMargin.format(table)
133 }
134
135 def parseResult(rawData: RawData): Result = {
136 implicit val jsonFormat: Formats = DefaultFormats
137 val json = parse(rawData.json_data)
138 val item = json \ "data"
139
140 Result(
141 comic_id=(item \ "comic_id").extractOrElse[String](""),
142 kk_currency_balance=(item \ "kk_currency_balance").extractOrElse[String](""),
143 is_auto_pay=(item \ "is_auto_pay").extractOrElse[String](""),
144 default_bucket=(item \ "default_bucket").extractOrElse[String](""),
145 entire_preferential=(item \ "entire_preferential").extractOrElse[String](""),
146 coupon_coupon_count=(item \ "coupon" \ "coupon_count").extractOrElse[String](""),
147 pic_text_banner=(item \ "pic_text_banner").extractOrElse[String](""),
148 auto_pay_reminder_show=(item \ "auto_pay_reminder" \ "show").extractOrElse[String](""),
149 auto_pay_reminder_selected=(item \ "auto_pay_reminder" \ "selected").extractOrElse[String](""),
150// batch_purchase_list=$your parse code,
151 adv_view_has_adv=(item \ "adv_view" \ "has_adv").extractOrElse[String](""),
152 adv_view_switch_buy_btn_text=(item \ "adv_view" \ "switch_buy_btn_text").extractOrElse[String](""),
153 adv_banner=(item \ "adv_banner" ).extractOrElse[List[String]](List.empty).mkString(","),
154 pic_text_banner_id=(item \ "pic_text_banner" \ "id").extractOrElse[String](""),
155 pic_text_banner_text1=(item \ "pic_text_banner" \ "text1").extractOrElse[String](""),
156 pic_text_banner_text2=(item \ "pic_text_banner" \ "text2").extractOrElse[String](""),
157 pic_text_banner_pic=(item \ "pic_text_banner" \ "pic").extractOrElse[String](""),
158 pic_text_banner_last_update_time=(item \ "pic_text_banner" \ "last_update_time").extractOrElse[String](""),
159 pic_text_banner_text_type=(item \ "pic_text_banner" \ "text_type").extractOrElse[String](""),
160 action_target_action_type=(item \ "action_target" \ "action_type").extractOrElse[String](""),
161 action_target_target_id=(item \ "action_target" \ "target_id").extractOrElse[String](""),
162 action_target_target_app_url=(item \ "action_target" \ "target_app_url").extractOrElse[String](""),
163 action_target_target_web_url=(item \ "action_target" \ "target_web_url").extractOrElse[String](""),
164 action_target_target_package_name=(item \ "action_target" \ "target_package_name").extractOrElse[String](""),
165 action_target_target_title=(item \ "action_target" \ "target_title").extractOrElse[String](""),
166 action_target_hybrid_url=(item \ "action_target" \ "hybrid_url").extractOrElse[String](""),
167 action_target_target_guide=(item \ "action_target" \ "target_guide").extractOrElse[String](""),
168 action_target_target_guide_name=(item \ "action_target" \ "target_guide_name").extractOrElse[String](""),
169 action_target_non_iap_supported=(item \ "action_target" \ "non_iap_supported").extractOrElse[String](""),
170 pic_text_banner_adv_info=(item \ "pic_text_banner" \ "adv_info").extractOrElse[String](""),
171 pic_text_banner_personality_hit_result=(item \ "pic_text_banner" \ "personality_hit_result").extractOrElse[String](""),
172 bubble_text=(item \ "bubble_text" ).extractOrElse[List[String]](List.empty).mkString(","),
173 top_banner=(item \ "top_banner" ).extractOrElse[List[String]](List.empty).mkString(","),
174 vip_info=(item \ "vip_info").extractOrElse[String](""),
175 toast_text_batch_to_card=(item \ "toast_text" \ "batch_to_card").extractOrElse[String](""),
176 toast_text_card_to_single=(item \ "toast_text" \ "card_to_single").extractOrElse[String](""),
177 autoPay=(item \ "autoPay").extractOrElse[String](""),
178 raw_id=rawData.id.toString,
179 log_date = LogDate.get()
180 )
181 }
182}
183
184
185
186
187
188
189
190
191
192