<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question I can not write in KUDU table using Pyspark in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/338701#M232903</link>
    <description>&lt;P&gt;&lt;STRONG&gt;Error&lt;/STRONG&gt;&lt;/P&gt;
&lt;PRE&gt;Exception &lt;SPAN class="hljs-keyword"&gt;in&lt;/SPAN&gt; thread &lt;SPAN class="hljs-string"&gt;"main"&lt;/SPAN&gt; org.apache.spark.SparkException: No main &lt;SPAN class="hljs-keyword"&gt;class&lt;/SPAN&gt; &lt;SPAN class="hljs-title class_"&gt;set&lt;/SPAN&gt; &lt;SPAN class="hljs-keyword"&gt;in&lt;/SPAN&gt; JAR; please specify one &lt;SPAN class="hljs-keyword"&gt;with&lt;/SPAN&gt; --&lt;SPAN class="hljs-keyword"&gt;class&lt;/SPAN&gt;
        &lt;SPAN class="hljs-title class_"&gt;at&lt;/SPAN&gt; org.apache.spark.deploy.SparkSubmitArguments.error(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;657&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.validateSubmitArguments(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;266&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.validateArguments(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;251&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.&amp;lt;init&amp;gt;(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;120&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2&lt;/SPAN&gt;$$anon$&lt;SPAN class="hljs-number"&gt;1.&lt;/SPAN&gt;&amp;lt;init&amp;gt;(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;913&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2.&lt;/SPAN&gt;parseArguments(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;913&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;81&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2.&lt;/SPAN&gt;doSubmit(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;926&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;935&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)&lt;/PRE&gt;
&lt;P&gt;&lt;STRONG&gt;Kudu table&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Moawad_0-1647357036451.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/33947i9F3069BECA10EB68/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Moawad_0-1647357036451.png" alt="Moawad_0-1647357036451.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Pyspark code&lt;/STRONG&gt;&lt;/P&gt;
&lt;DIV class="s-prose js-post-body"&gt;
&lt;PRE&gt;&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; os
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_PYTHON'&lt;/SPAN&gt;]=&lt;SPAN class="hljs-string"&gt;"/u01/shared/tools/envs/tensor_2_1/bin/python3.6"&lt;/SPAN&gt;
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_DRIVER_PYTHON'&lt;/SPAN&gt;]=&lt;SPAN class="hljs-string"&gt;"/u01/shared/tools/envs/tensor_2_1/bin/python3.6"&lt;/SPAN&gt;
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_SUBMIT_ARGS'&lt;/SPAN&gt;] = &lt;SPAN class="hljs-string"&gt;"/home/v22fingerprintbda/FPTeam/Streams/kudu-spark_2.10-1.5.0.jar pyspark-shell"&lt;/SPAN&gt; 

&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; time 
&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; findspark
findspark.init(&lt;SPAN class="hljs-string"&gt;'/opt/cloudera/parcels/CDH-6.2.1-1.cdh6.2.1.p0.1580995/lib/spark'&lt;/SPAN&gt;)
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; SparkContext, SQLContext, StorageLevel
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; SparkSession
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; functions &lt;SPAN class="hljs-keyword"&gt;as&lt;/SPAN&gt; F
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql.functions &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; col,isnan,when,count

spark = SparkSession.builder.master(&lt;SPAN class="hljs-string"&gt;"local"&lt;/SPAN&gt;).appName(&lt;SPAN class="hljs-string"&gt;"MEDReader"&lt;/SPAN&gt;).getOrCreate()

sd = [(&lt;SPAN class="hljs-string"&gt;"1"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"Ahmed"&lt;/SPAN&gt;),
      (&lt;SPAN class="hljs-string"&gt;"2"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"Emad"&lt;/SPAN&gt;)]

sch = [&lt;SPAN class="hljs-string"&gt;"id"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"name"&lt;/SPAN&gt;]

kududf = spark.createDataFrame(data=sd, schema=sch)

&lt;SPAN class="hljs-comment"&gt;#print("Starting KUDU .......")&lt;/SPAN&gt;

&lt;SPAN class="hljs-comment"&gt;# Create a table on KUDU&lt;/SPAN&gt;
kududf.write \
      .&lt;SPAN class="hljs-built_in"&gt;format&lt;/SPAN&gt;(&lt;SPAN class="hljs-string"&gt;"org.apache.kudu.spark.kudu"&lt;/SPAN&gt;) \
      .option(&lt;SPAN class="hljs-string"&gt;'kudu.master'&lt;/SPAN&gt;,kuduMaster)\
      .option(&lt;SPAN class="hljs-string"&gt;'kudu.table'&lt;/SPAN&gt;,&lt;SPAN class="hljs-string"&gt;"impala::bde.FP_KUDU_TEST"&lt;/SPAN&gt;) \
      .mode(&lt;SPAN class="hljs-string"&gt;"append"&lt;/SPAN&gt;) \
      .save()&lt;/PRE&gt;
&lt;P&gt;&lt;STRONG&gt;Additional info&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;spark version 2.4.0-cdh6.2.1 kudu 1.9.0-cdh6.2.1&lt;/P&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 15 Mar 2022 20:19:28 GMT</pubDate>
    <dc:creator>Moawad</dc:creator>
    <dc:date>2022-03-15T20:19:28Z</dc:date>
    <item>
      <title>I can not write in KUDU table using Pyspark</title>
      <link>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/338701#M232903</link>
      <description>&lt;P&gt;&lt;STRONG&gt;Error&lt;/STRONG&gt;&lt;/P&gt;
&lt;PRE&gt;Exception &lt;SPAN class="hljs-keyword"&gt;in&lt;/SPAN&gt; thread &lt;SPAN class="hljs-string"&gt;"main"&lt;/SPAN&gt; org.apache.spark.SparkException: No main &lt;SPAN class="hljs-keyword"&gt;class&lt;/SPAN&gt; &lt;SPAN class="hljs-title class_"&gt;set&lt;/SPAN&gt; &lt;SPAN class="hljs-keyword"&gt;in&lt;/SPAN&gt; JAR; please specify one &lt;SPAN class="hljs-keyword"&gt;with&lt;/SPAN&gt; --&lt;SPAN class="hljs-keyword"&gt;class&lt;/SPAN&gt;
        &lt;SPAN class="hljs-title class_"&gt;at&lt;/SPAN&gt; org.apache.spark.deploy.SparkSubmitArguments.error(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;657&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.validateSubmitArguments(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;266&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.validateArguments(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;251&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmitArguments.&amp;lt;init&amp;gt;(SparkSubmitArguments.scala:&lt;SPAN class="hljs-number"&gt;120&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2&lt;/SPAN&gt;$$anon$&lt;SPAN class="hljs-number"&gt;1.&lt;/SPAN&gt;&amp;lt;init&amp;gt;(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;913&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2.&lt;/SPAN&gt;parseArguments(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;913&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;81&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$$anon$&lt;SPAN class="hljs-number"&gt;2.&lt;/SPAN&gt;doSubmit(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;926&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:&lt;SPAN class="hljs-number"&gt;935&lt;/SPAN&gt;)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)&lt;/PRE&gt;
&lt;P&gt;&lt;STRONG&gt;Kudu table&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Moawad_0-1647357036451.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/33947i9F3069BECA10EB68/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Moawad_0-1647357036451.png" alt="Moawad_0-1647357036451.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Pyspark code&lt;/STRONG&gt;&lt;/P&gt;
&lt;DIV class="s-prose js-post-body"&gt;
&lt;PRE&gt;&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; os
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_PYTHON'&lt;/SPAN&gt;]=&lt;SPAN class="hljs-string"&gt;"/u01/shared/tools/envs/tensor_2_1/bin/python3.6"&lt;/SPAN&gt;
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_DRIVER_PYTHON'&lt;/SPAN&gt;]=&lt;SPAN class="hljs-string"&gt;"/u01/shared/tools/envs/tensor_2_1/bin/python3.6"&lt;/SPAN&gt;
os.environ[&lt;SPAN class="hljs-string"&gt;'PYSPARK_SUBMIT_ARGS'&lt;/SPAN&gt;] = &lt;SPAN class="hljs-string"&gt;"/home/v22fingerprintbda/FPTeam/Streams/kudu-spark_2.10-1.5.0.jar pyspark-shell"&lt;/SPAN&gt; 

&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; time 
&lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; findspark
findspark.init(&lt;SPAN class="hljs-string"&gt;'/opt/cloudera/parcels/CDH-6.2.1-1.cdh6.2.1.p0.1580995/lib/spark'&lt;/SPAN&gt;)
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; SparkContext, SQLContext, StorageLevel
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; SparkSession
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; functions &lt;SPAN class="hljs-keyword"&gt;as&lt;/SPAN&gt; F
&lt;SPAN class="hljs-keyword"&gt;from&lt;/SPAN&gt; pyspark.sql.functions &lt;SPAN class="hljs-keyword"&gt;import&lt;/SPAN&gt; col,isnan,when,count

spark = SparkSession.builder.master(&lt;SPAN class="hljs-string"&gt;"local"&lt;/SPAN&gt;).appName(&lt;SPAN class="hljs-string"&gt;"MEDReader"&lt;/SPAN&gt;).getOrCreate()

sd = [(&lt;SPAN class="hljs-string"&gt;"1"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"Ahmed"&lt;/SPAN&gt;),
      (&lt;SPAN class="hljs-string"&gt;"2"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"Emad"&lt;/SPAN&gt;)]

sch = [&lt;SPAN class="hljs-string"&gt;"id"&lt;/SPAN&gt;, &lt;SPAN class="hljs-string"&gt;"name"&lt;/SPAN&gt;]

kududf = spark.createDataFrame(data=sd, schema=sch)

&lt;SPAN class="hljs-comment"&gt;#print("Starting KUDU .......")&lt;/SPAN&gt;

&lt;SPAN class="hljs-comment"&gt;# Create a table on KUDU&lt;/SPAN&gt;
kududf.write \
      .&lt;SPAN class="hljs-built_in"&gt;format&lt;/SPAN&gt;(&lt;SPAN class="hljs-string"&gt;"org.apache.kudu.spark.kudu"&lt;/SPAN&gt;) \
      .option(&lt;SPAN class="hljs-string"&gt;'kudu.master'&lt;/SPAN&gt;,kuduMaster)\
      .option(&lt;SPAN class="hljs-string"&gt;'kudu.table'&lt;/SPAN&gt;,&lt;SPAN class="hljs-string"&gt;"impala::bde.FP_KUDU_TEST"&lt;/SPAN&gt;) \
      .mode(&lt;SPAN class="hljs-string"&gt;"append"&lt;/SPAN&gt;) \
      .save()&lt;/PRE&gt;
&lt;P&gt;&lt;STRONG&gt;Additional info&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;spark version 2.4.0-cdh6.2.1 kudu 1.9.0-cdh6.2.1&lt;/P&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 15 Mar 2022 20:19:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/338701#M232903</guid>
      <dc:creator>Moawad</dc:creator>
      <dc:date>2022-03-15T20:19:28Z</dc:date>
    </item>
    <item>
      <title>Re: I can not write in KUDU table using Pyspark</title>
      <link>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/338968#M232984</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96574"&gt;@Moawad&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for using Cloudera Community. Based on the Post, Your Team is having issues connecting Kudu via pySpark.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Kindly confirm whether a Simple Example [1] as documented in CDH 6.2.x Guide works for your Team.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards, Smarak&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;[1]&amp;nbsp;&lt;A href="https://docs.cloudera.com/documentation/enterprise/6/6.2/topics/kudu_development.html" target="_blank"&gt;https://docs.cloudera.com/documentation/enterprise/6/6.2/topics/kudu_development.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;[2]&amp;nbsp;&lt;A href="https://kudu.apache.org/docs/developing.html" target="_blank"&gt;https://kudu.apache.org/docs/developing.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 21 Mar 2022 03:51:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/338968#M232984</guid>
      <dc:creator>smdas</dc:creator>
      <dc:date>2022-03-21T03:51:04Z</dc:date>
    </item>
    <item>
      <title>Re: I can not write in KUDU table using Pyspark</title>
      <link>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/339888#M233203</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96574"&gt;@Moawad&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hope you are doing well. Kindly let us know if the Post on 03/20 documenting few Links from CDH v6.x helped your Team.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards, Smarak&lt;/P&gt;</description>
      <pubDate>Tue, 29 Mar 2022 08:17:27 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/I-can-not-write-in-KUDU-table-using-Pyspark/m-p/339888#M233203</guid>
      <dc:creator>smdas</dc:creator>
      <dc:date>2022-03-29T08:17:27Z</dc:date>
    </item>
  </channel>
</rss>

