import os
os.environ['PYSPARK_PYTHON']="/u01/shared/tools/envs/tensor_2_1/bin/python3.6"
os.environ['PYSPARK_DRIVER_PYTHON']="/u01/shared/tools/envs/tensor_2_1/bin/python3.6"
os.environ['PYSPARK_SUBMIT_ARGS'] = "/home/v22fingerprintbda/FPTeam/Streams/kudu-spark_2.10-1.5.0.jar pyspark-shell"
import time
import findspark
findspark.init('/opt/cloudera/parcels/CDH-6.2.1-1.cdh6.2.1.p0.1580995/lib/spark')
from pyspark import SparkContext, SQLContext, StorageLevel
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.functions import col,isnan,when,count
spark = SparkSession.builder.master("local").appName("MEDReader").getOrCreate()
sd = [("1", "Ahmed"),
("2", "Emad")]
sch = ["id", "name"]
kududf = spark.createDataFrame(data=sd, schema=sch)
#print("Starting KUDU .......")
# Create a table on KUDU
kududf.write \
.format("org.apache.kudu.spark.kudu") \
.option('kudu.master',kuduMaster)\
.option('kudu.table',"impala::bde.FP_KUDU_TEST") \
.mode("append") \
.save()
Additional info spark version 2.4.0-cdh6.2.1 kudu 1.9.0-cdh6.2.1