Im working on a spark program which can load the data into a Hive table.
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql._
import org.apache.spark.sql.DataFrame
object SparkToHive {
case class partc(id:Int, name:String, salary:Int, dept:String, location:String)
def main(args: Array[String]) {
val warehouseLocation = "file:${system:user.dir}/spark-warehouse"
val sparkSession = SparkSession.builder.master("local[2]").appName("Saving data into HiveTable using Spark")
.enableHiveSupport()
.config("hive.exec.dynamic.partition", "true")
.config("hive.exec.dynamic.partition.mode", "nonstrict")
.config("hive.metastore.warehouse.dir", "/user/hive/warehouse")
.getOrCreate()
import sparkSession.implicits._
val partfile = sparkSession.sparkContext.textFile("partfile")
val partdata = partfile.map(part => part.split(","))
val partRDD = partdata.map(line => partc(line(0).toInt, line(1), line(2).toInt, line(3), line(4)))
val partDF = partRDD.toDF()
partDF.write.mode(SaveMode.Append).insertInto("parttab")
}
}
But I am getting error at this line:
val partDF = partRDD.toDF()
saying:
value toDF is not a member of org.apache.spark.rdd.RDD[com.sparktohive.load.SparkToHive.partc]
Could anyone tell me how can I fix it ?