<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question [CDH 5.3] Spark -hive integration issue in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/24934#M23570</link>
    <description>&lt;P&gt;i am trying to use hive from spark ,to do word count in a flume stream then save it in hive database, so simply as a start i want to create a table if it doesn't exist&amp;nbsp;, but i am getting error :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;2015-02-23 09:58:16,717 INFO [main] storage.BlockManagerMaster (Logging.scala:logInfo(59)) - Registered BlockManager&lt;BR /&gt;Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf&lt;BR /&gt;at WordCount$.main(WordCount.scala:46)&lt;BR /&gt;at WordCount.main(WordCount.scala)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)&lt;BR /&gt;at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;BR /&gt;at java.lang.reflect.Method.invoke(Method.java:606)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)&lt;BR /&gt;Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf&lt;BR /&gt;at java.net.URLClassLoader$1.run(URLClassLoader.java:366)&lt;BR /&gt;at java.net.URLClassLoader$1.run(URLClassLoader.java:355)&lt;BR /&gt;at java.security.AccessController.doPrivileged(Native Method)&lt;BR /&gt;at java.net.URLClassLoader.findClass(URLClassLoader.java:354)&lt;BR /&gt;at java.lang.ClassLoader.loadClass(ClassLoader.java:425)&lt;BR /&gt;at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)&lt;BR /&gt;at java.lang.ClassLoader.loadClass(ClassLoader.java:358)&lt;BR /&gt;... 9 more&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;which is here&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;My Code is :&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume._
import org.apache.spark.util.IntParam
import org.apache.spark.sql._
import org.apache.spark.sql.SQLContext
import org.apache.hadoop.hive._
import org.apache.spark.sql.hive.HiveContext


/**
 * @author tabouzaid
 */
object WordCount {
  def main(args: Array[String]) {
    if (args.length &amp;lt; 2) {
      System.err.println(
        "Usage: WordCount &amp;lt;host&amp;gt; &amp;lt;port&amp;gt;")
      System.exit(1)
    }
    val Array(host, port) = args
    val batchInterval = Milliseconds(2000)
    val sparkConf = new SparkConf().setAppName("WordCount")
    val sc = new SparkContext(sparkConf)
    val ssc = new StreamingContext(sc, batchInterval)
    val stream = FlumeUtils.createStream(ssc, host, port.toInt)
    stream.count().map(cnt =&amp;gt; "Received !!!:::::" + cnt + " flume events." ).print()
    val body = stream.map(e =&amp;gt; new String(e.event.getBody.array))
   val counts = body.flatMap(line =&amp;gt; line.toLowerCase.replaceAll("[^a-zA-Z0-9\\s]", "").split("\\s+"))
                 .map(word =&amp;gt; (word, 1))
                 .reduceByKey(_ + _)
 val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
 sqlContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
    ssc.start()
    ssc.awaitTermination()
  }
}&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;my sbt to build this project :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;name := "WordCount"

version := "1.0"

scalaVersion := "2.10.4"

libraryDependencies += "org.apache.spark" %% "spark-core" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-streaming_2.10" % "1.2.0"
libraryDependencies += "org.apache.spark" %% "spark-streaming-flume" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-sql_2.10" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-hive_2.10" % "1.2.0"

&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;the run command is :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;sudo spark-submit --class "WordCount" --master local[*] --jars /usr/local/WordCount/target/scala-2.10/spark-streaming-flume_2.11-1.2.0.jar,/usr/lib/avro/avro-ipc-1.7.6-cdh5.3.0.jar,/usr/lib/flume-ng/lib/flume-ng-sdk-1.5.0-cdh5.3.0.jar,/usr/lib/hive/lib/hive-common-0.13.1-cdh5.3.0.jar,/usr/local/WordCount/target/scala-2.10/spark-hive_2.10-1.2.0-cdh5.3.0.jar,/usr/local/WordCount/target/scala-2.10/spark-sql_2.10-1.2.0.jar  /usr/local/WordCount/target/scala-2.10/wordcount_2.10-1.0.jar 127.0.0.1 9999&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 16 Sep 2022 09:22:17 GMT</pubDate>
    <dc:creator>tarekabouzeid91</dc:creator>
    <dc:date>2022-09-16T09:22:17Z</dc:date>
    <item>
      <title>[CDH 5.3] Spark -hive integration issue</title>
      <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/24934#M23570</link>
      <description>&lt;P&gt;i am trying to use hive from spark ,to do word count in a flume stream then save it in hive database, so simply as a start i want to create a table if it doesn't exist&amp;nbsp;, but i am getting error :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;2015-02-23 09:58:16,717 INFO [main] storage.BlockManagerMaster (Logging.scala:logInfo(59)) - Registered BlockManager&lt;BR /&gt;Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf&lt;BR /&gt;at WordCount$.main(WordCount.scala:46)&lt;BR /&gt;at WordCount.main(WordCount.scala)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)&lt;BR /&gt;at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;BR /&gt;at java.lang.reflect.Method.invoke(Method.java:606)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)&lt;BR /&gt;at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)&lt;BR /&gt;Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf&lt;BR /&gt;at java.net.URLClassLoader$1.run(URLClassLoader.java:366)&lt;BR /&gt;at java.net.URLClassLoader$1.run(URLClassLoader.java:355)&lt;BR /&gt;at java.security.AccessController.doPrivileged(Native Method)&lt;BR /&gt;at java.net.URLClassLoader.findClass(URLClassLoader.java:354)&lt;BR /&gt;at java.lang.ClassLoader.loadClass(ClassLoader.java:425)&lt;BR /&gt;at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)&lt;BR /&gt;at java.lang.ClassLoader.loadClass(ClassLoader.java:358)&lt;BR /&gt;... 9 more&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;which is here&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;My Code is :&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume._
import org.apache.spark.util.IntParam
import org.apache.spark.sql._
import org.apache.spark.sql.SQLContext
import org.apache.hadoop.hive._
import org.apache.spark.sql.hive.HiveContext


/**
 * @author tabouzaid
 */
object WordCount {
  def main(args: Array[String]) {
    if (args.length &amp;lt; 2) {
      System.err.println(
        "Usage: WordCount &amp;lt;host&amp;gt; &amp;lt;port&amp;gt;")
      System.exit(1)
    }
    val Array(host, port) = args
    val batchInterval = Milliseconds(2000)
    val sparkConf = new SparkConf().setAppName("WordCount")
    val sc = new SparkContext(sparkConf)
    val ssc = new StreamingContext(sc, batchInterval)
    val stream = FlumeUtils.createStream(ssc, host, port.toInt)
    stream.count().map(cnt =&amp;gt; "Received !!!:::::" + cnt + " flume events." ).print()
    val body = stream.map(e =&amp;gt; new String(e.event.getBody.array))
   val counts = body.flatMap(line =&amp;gt; line.toLowerCase.replaceAll("[^a-zA-Z0-9\\s]", "").split("\\s+"))
                 .map(word =&amp;gt; (word, 1))
                 .reduceByKey(_ + _)
 val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
 sqlContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
    ssc.start()
    ssc.awaitTermination()
  }
}&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;my sbt to build this project :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;name := "WordCount"

version := "1.0"

scalaVersion := "2.10.4"

libraryDependencies += "org.apache.spark" %% "spark-core" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-streaming_2.10" % "1.2.0"
libraryDependencies += "org.apache.spark" %% "spark-streaming-flume" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-sql_2.10" % "1.2.0"
libraryDependencies += "org.apache.spark" % "spark-hive_2.10" % "1.2.0"

&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;the run command is :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;sudo spark-submit --class "WordCount" --master local[*] --jars /usr/local/WordCount/target/scala-2.10/spark-streaming-flume_2.11-1.2.0.jar,/usr/lib/avro/avro-ipc-1.7.6-cdh5.3.0.jar,/usr/lib/flume-ng/lib/flume-ng-sdk-1.5.0-cdh5.3.0.jar,/usr/lib/hive/lib/hive-common-0.13.1-cdh5.3.0.jar,/usr/local/WordCount/target/scala-2.10/spark-hive_2.10-1.2.0-cdh5.3.0.jar,/usr/local/WordCount/target/scala-2.10/spark-sql_2.10-1.2.0.jar  /usr/local/WordCount/target/scala-2.10/wordcount_2.10-1.0.jar 127.0.0.1 9999&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 09:22:17 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/24934#M23570</guid>
      <dc:creator>tarekabouzeid91</dc:creator>
      <dc:date>2022-09-16T09:22:17Z</dc:date>
    </item>
    <item>
      <title>Re: [CDH 5.3] Spark -hive integration issue</title>
      <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/25419#M23571</link>
      <description>&lt;P&gt;I have exactly the same problem in CDH 5.2.1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;</description>
      <pubDate>Mon, 09 Mar 2015 08:16:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/25419#M23571</guid>
      <dc:creator>masfworld</dc:creator>
      <dc:date>2015-03-09T08:16:23Z</dc:date>
    </item>
    <item>
      <title>Re: [CDH 5.3] Spark -hive integration issue</title>
      <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/25421#M23572</link>
      <description>&lt;P&gt;This is answered a few times already here. Have a look at for example&amp;nbsp;&lt;A target="_blank" href="http://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/I-am-using-a-hive-cotext-in-pyspark-cdh5-3-virtual-box-and-i-get/m-p/24418#U24418"&gt;http://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/I-am-using-a-hive-cotext-in-pyspark-cdh5-3-virtual-box-and-i-get/m-p/24418#U24418&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The short answer is that Spark is not entirely compatible with recent versions of Hive found in CDH, but may still work for a lot of use cases. The Spark bits are still there. You have to add Hive to the classpath yourself.&lt;/P&gt;</description>
      <pubDate>Mon, 09 Mar 2015 10:02:07 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/25421#M23572</guid>
      <dc:creator>srowen</dc:creator>
      <dc:date>2015-03-09T10:02:07Z</dc:date>
    </item>
    <item>
      <title>Re: [CDH 5.3] Spark -hive integration issue</title>
      <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/53230#M23573</link>
      <description>Hi, do you have to add hive to the classpath on every node ?</description>
      <pubDate>Wed, 05 Apr 2017 17:07:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/53230#M23573</guid>
      <dc:creator>Lleal</dc:creator>
      <dc:date>2017-04-05T17:07:52Z</dc:date>
    </item>
    <item>
      <title>Re: [CDH 5.3] Spark -hive integration issue</title>
      <link>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/53235#M23574</link>
      <description>yes you have to</description>
      <pubDate>Wed, 05 Apr 2017 17:45:39 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/CDH-5-3-Spark-hive-integration-issue/m-p/53235#M23574</guid>
      <dc:creator>tarekabouzeid91</dc:creator>
      <dc:date>2017-04-05T17:45:39Z</dc:date>
    </item>
  </channel>
</rss>

