· 7 years ago · Dec 17, 2018, 05:20 PM
1val myDF = // create data frame
2val tableLocation = /hdfs/hive/your-directory
3
4myDF.write.mode("overwrite").orc(tableLocation)
5
6val typeMapper = Map(
7 DataTypes.StringType -> "String",
8 DataTypes.FloatType -> "Float",
9 DataTypes.DoubleType -> "Double",
10 DataTypes.IntegerType -> "Integer",
11 DataTypes.createArrayType(DataTypes.DoubleType, true) -> "Array<Double>",
12 DataTypes.createArrayType(DataTypes.DoubleType, false) -> "Array<Double>"
13 )
14
15val columnListing = myDF.schema.map( x => s"${x.name} ${typeMapper(x.dataType)}").mkString(",\n")
16val createStatement = s"CREATE EXTERNAL TABLE IF NOT EXISTS mydb.tbl1 ($columnListing) STORED AS ORC LOCATION '$tableLocation'"
17println(createStatement)
18spark.sql(createStatement).show