<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Run Spark App Error in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/17884#M2743</link>
    <description>&lt;P&gt;This is a conflict between the version of Guava that Spark uses, and the version used by Hadoop. How are you packaging your app? and can you run with spark-submit? this tends to take care of this conflict.&lt;/P&gt;</description>
    <pubDate>Fri, 29 Aug 2014 22:54:25 GMT</pubDate>
    <dc:creator>srowen</dc:creator>
    <dc:date>2014-08-29T22:54:25Z</dc:date>
    <item>
      <title>Run Spark App Error</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/17870#M2742</link>
      <description>&lt;P&gt;Case 1: when use java command line to run:&lt;/P&gt;&lt;P&gt;#$JAVA_HOME/bin/java -cp $CLASSPATH -Dspark.master=spark://10.xxx.xxx.xxx:43191 com.cloudera.sparkwordcount.SparkWordCount hdfs://xxxxxx.com:8020/user/hdfs/spark/LICENSE 2&lt;BR /&gt;&lt;BR /&gt;I got java.lang.NoSuchMethodError: com.google.common.HashFunction.hasInt(I)Lcom...&lt;BR /&gt;&lt;BR /&gt;14/08/29 18:37:16 INFO spark.SecurityManager: Changing view acls to: root&lt;BR /&gt;14/08/29 18:37:16 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root)&lt;BR /&gt;14/08/29 18:37:17 INFO slf4j.Slf4jLogger: Slf4jLogger started&lt;BR /&gt;14/08/29 18:37:17 INFO Remoting: Starting remoting&lt;BR /&gt;14/08/29 18:37:17 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://spark@Exxxxy-head.amers1.ciscloud:52049]&lt;BR /&gt;14/08/29 18:37:17 INFO Remoting: Remoting now listens on addresses: [akka.tcp://spark@Exxxxxx.ciscloud:52049]&lt;BR /&gt;14/08/29 18:37:17 INFO spark.SparkEnv: Registering MapOutputTracker&lt;BR /&gt;14/08/29 18:37:17 INFO spark.SparkEnv: Registering BlockManagerMaster&lt;BR /&gt;14/08/29 18:37:17 INFO storage.DiskBlockManager: Created local directory at /tmp/spark-local-20140829183717-3369&lt;BR /&gt;14/08/29 18:37:17 INFO storage.MemoryStore: MemoryStore started with capacity 2.0 GB.&lt;BR /&gt;14/08/29 18:37:17 INFO network.ConnectionManager: Bound socket to port 45604 with id = ConnectionManagerId(xxxxx,45604)&lt;BR /&gt;14/08/29 18:37:17 INFO storage.BlockManagerMaster: Trying to register BlockManager&lt;BR /&gt;14/08/29 18:37:17 INFO storage.BlockManagerInfo: Registering block manager ETSInterDay-head.amers1.ciscloud:45604 with 2.0 GB RAM&lt;BR /&gt;14/08/29 18:37:17 INFO storage.BlockManagerMaster: Registered BlockManager&lt;BR /&gt;14/08/29 18:37:17 INFO spark.HttpServer: Starting HTTP Server&lt;BR /&gt;14/08/29 18:37:17 INFO server.Server: jetty-8.y.z-SNAPSHOT&lt;BR /&gt;14/08/29 18:37:17 INFO server.AbstractConnector: Started SocketConnector@0.0.0.0:54315&lt;BR /&gt;14/08/29 18:37:17 INFO broadcast.HttpBroadcast: Broadcast server started at &lt;A href="http://xxxxx:54315" target="_blank"&gt;http://xxxxx:54315&lt;/A&gt;&lt;BR /&gt;14/08/29 18:37:17 INFO spark.HttpFileServer: HTTP File server directory is /tmp/spark-19eccd14-bc32-4112-9e97-2197e059456b&lt;BR /&gt;14/08/29 18:37:17 INFO spark.HttpServer: Starting HTTP Server&lt;BR /&gt;14/08/29 18:37:17 INFO server.Server: jetty-8.y.z-SNAPSHOT&lt;BR /&gt;14/08/29 18:37:17 INFO server.AbstractConnector: Started SocketConnector@0.0.0.0:50747&lt;BR /&gt;14/08/29 18:37:18 INFO server.Server: jetty-8.y.z-SNAPSHOT&lt;BR /&gt;14/08/29 18:37:18 INFO server.AbstractConnector: Started SelectChannelConnector@0.0.0.0:4040&lt;BR /&gt;14/08/29 18:37:18 INFO ui.SparkUI: Started SparkUI at &lt;A href="http://xxxd:4040" target="_blank"&gt;http://xxxd:4040&lt;/A&gt;&lt;BR /&gt;14/08/29 18:37:18 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable&lt;BR /&gt;14/08/29 18:37:18 INFO client.AppClient$ClientActor: Connecting to master spark://1xxxx...&lt;BR /&gt;14/08/29 18:37:18 WARN storage.BlockManager: Putting block broadcast_0 failed&lt;BR /&gt;Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.hash.HashFunction.hashInt(I)Lcom/google/common/hash/HashCode;&lt;BR /&gt;at org.apache.spark.util.collection.OpenHashSet.org$apache$spark$util$collection$OpenHashSet$$hashcode(OpenHashSet.scala:261)&lt;BR /&gt;at org.apache.spark.util.collection.OpenHashSet$mcI$sp.getPos$mcI$sp(OpenHashSet.scala:165)&lt;BR /&gt;at org.apache.spark.util.collection.OpenHashSet$mcI$sp.contains$mcI$sp(OpenHashSet.scala:102)&lt;BR /&gt;at org.apache.spark.util.SizeEstimator$$anonfun$visitArray$2.apply$mcVI$sp(SizeEstimator.scala:214)&lt;BR /&gt;at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)&lt;BR /&gt;at org.apache.spark.util.SizeEstimator$.visitArray(SizeEstimator.scala:210)&lt;BR /&gt;at org.apache.spark.util.SizeEstimator$.visitSingleObject(SizeEstimator.scala:169)&lt;BR /&gt;at org.apache.spark.util.SizeEstimator$.org$apache$spark$util$SizeEstimator$$estimate(SizeEstimator.scala:161)&lt;BR /&gt;at org.apache.spark.util.SizeEstimator$.estimate(SizeEstimator.scala:155)&lt;BR /&gt;at org.apache.spark.storage.MemoryStore.putValues(MemoryStore.scala:75)&lt;BR /&gt;at org.apache.spark.storage.MemoryStore.putValues(MemoryStore.scala:92)&lt;BR /&gt;at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:661)&lt;BR /&gt;at org.apache.spark.storage.BlockManager.put(BlockManager.scala:546)&lt;BR /&gt;at org.apache.spark.storage.BlockManager.putSingle(BlockManager.scala:812)&lt;BR /&gt;at org.apache.spark.broadcast.HttpBroadcast.&amp;lt;init&amp;gt;(HttpBroadcast.scala:52)&lt;BR /&gt;at org.apache.spark.broadcast.HttpBroadcastFactory.newBroadcast(HttpBroadcastFactory.scala:35)&lt;BR /&gt;at org.apache.spark.broadcast.HttpBroadcastFactory.newBroadcast(HttpBroadcastFactory.scala:29)&lt;BR /&gt;at org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)&lt;BR /&gt;at org.apache.spark.SparkContext.broadcast(SparkContext.scala:776)&lt;BR /&gt;at org.apache.spark.SparkContext.hadoopFile(SparkContext.scala:545)&lt;BR /&gt;at org.apache.spark.SparkContext.textFile(SparkContext.scala:457)&lt;BR /&gt;at com.cloudera.sparkwordcount.SparkWordCount$.main(SparkWordCount.scala:17)&lt;BR /&gt;at com.cloudera.sparkwordcount.SparkWordCount.main(SparkWordCount.scala)&lt;BR /&gt;&lt;BR /&gt;Case 2&lt;BR /&gt;======&lt;BR /&gt;When use:&lt;BR /&gt;./spark-submit --class com.cloudera.sparkwordcount.SparkWordCount --master spark://xx.xxx.xxx.xxx:43191 /hadoop/cloudera/parcels/CDH/lib/spark/m/wordcount/target/sparkwordcount-0.0.1-SNAPSHOT.jar hdfs://xxxx.xxx.xxx:8020//user//hdfs//spark//LICENSE 2&lt;BR /&gt;&lt;BR /&gt;I got:&lt;BR /&gt;&lt;BR /&gt;14/08/29 18:41:59 INFO client.AppClient$ClientActor: Executor updated: app-20140829184159-0005/0 is now RUNNING&lt;BR /&gt;14/08/29 18:41:59 INFO client.AppClient$ClientActor: Executor updated: app-20140829184159-0005/1 is now RUNNING&lt;BR /&gt;14/08/29 18:41:59 INFO mapred.FileInputFormat: Total input paths to process : 1&lt;BR /&gt;14/08/29 18:42:00 INFO spark.SparkContext: Starting job: collect at SparkWordCount.scala:28&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Registering RDD 4 (reduceByKey at SparkWordCount.scala:20)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Registering RDD 10 (reduceByKey at SparkWordCount.scala:26)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Got job 0 (collect at SparkWordCount.scala:28) with 2 output partitions (allowLocal=false)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Final stage: Stage 0(collect at SparkWordCount.scala:28)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Parents of final stage: List(Stage 1)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Missing parents: List(Stage 1)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Submitting Stage 2 (MapPartitionsRDD[4] at reduceByKey at SparkWordCount.scala:20), which has no missing parents&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.DAGScheduler: Submitting 2 missing tasks from Stage 2 (MapPartitionsRDD[4] at reduceByKey at SparkWordCount.scala:20)&lt;BR /&gt;14/08/29 18:42:00 INFO scheduler.TaskSchedulerImpl: Adding task set 2.0 with 2 tasks&lt;BR /&gt;14/08/29 18:42:01 INFO cluster.SparkDeploySchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@xxx:44712/user/Executor#-1200084333] with ID 1&lt;BR /&gt;14/08/29 18:42:01 INFO scheduler.TaskSetManager: Starting task 2.0:0 as TID 0 on executor 1: ETSInterDay-worker1.amers1.ciscloud (PROCESS_LOCAL)&lt;BR /&gt;14/08/29 18:42:01 INFO scheduler.TaskSetManager: Serialized task 2.0:0 as 2192 bytes in 2 ms&lt;BR /&gt;14/08/29 18:42:01 INFO scheduler.TaskSetManager: Starting task 2.0:1 as TID 1 on executor 1: ETSInterDay-worker1.amers1.ciscloud (PROCESS_LOCAL)&lt;BR /&gt;14/08/29 18:42:01 INFO scheduler.TaskSetManager: Serialized task 2.0:1 as 2192 bytes in 0 ms&lt;BR /&gt;14/08/29 18:42:01 INFO storage.BlockManagerInfo: Registering block manager ETSInterDay-worker1.amers1.ciscloud:41977 with 294.9 MB RAM&lt;BR /&gt;14/08/29 18:42:01 INFO cluster.SparkDeploySchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@xxxxx:49084/user/Executor#593030937] with ID 0&lt;BR /&gt;14/08/29 18:42:02 INFO storage.BlockManagerInfo: Registering block managerxxxxxx:55303 with 294.9 MB RAM&lt;BR /&gt;14/08/29 18:42:02 WARN scheduler.TaskSetManager: Lost TID 0 (task 2.0:0)&lt;BR /&gt;14/08/29 18:42:02 WARN scheduler.TaskSetManager: Loss was due to java.lang.RuntimeException&lt;BR /&gt;java.lang.RuntimeException: java.io.IOException: No FileSystem for scheme: hdfs&lt;BR /&gt;at org.apache.hadoop.mapred.JobConf.getWorkingDirectory(JobConf.java:657)&lt;BR /&gt;at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:389)&lt;BR /&gt;at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:362)&lt;BR /&gt;at org.apache.spark.SparkContext$$anonfun$22.apply(SparkContext.scala:546)&lt;BR /&gt;at org.apache.spark.SparkContext$$anonfun$22.apply(SparkContext.scala:546)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$1.apply(HadoopRDD.scala:145)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$1.apply(HadoopRDD.scala:145)&lt;BR /&gt;at scala.Option.map(Option.scala:145)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:145)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD$$anon$1.&amp;lt;init&amp;gt;(HadoopRDD.scala:189)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:184)&lt;BR /&gt;at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:93)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)&lt;BR /&gt;at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)&lt;BR /&gt;at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)&lt;BR /&gt;at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)&lt;BR /&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:158)&lt;BR /&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:51)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)&lt;BR /&gt;at java.lang.Thread.run(Thread.java:745)&lt;BR /&gt;&lt;BR /&gt;Please help!&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 09:06:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/17870#M2742</guid>
      <dc:creator>mikestl</dc:creator>
      <dc:date>2022-09-16T09:06:23Z</dc:date>
    </item>
    <item>
      <title>Re: Run Spark App Error</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/17884#M2743</link>
      <description>&lt;P&gt;This is a conflict between the version of Guava that Spark uses, and the version used by Hadoop. How are you packaging your app? and can you run with spark-submit? this tends to take care of this conflict.&lt;/P&gt;</description>
      <pubDate>Fri, 29 Aug 2014 22:54:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/17884#M2743</guid>
      <dc:creator>srowen</dc:creator>
      <dc:date>2014-08-29T22:54:25Z</dc:date>
    </item>
    <item>
      <title>Re: Run Spark App Error</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/19716#M2744</link>
      <description>&lt;P&gt;I am getting the same error when launching Spark job through Oozie using Java action. Any update on how to resolve this?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Fri, 03 Oct 2014 22:28:13 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/19716#M2744</guid>
      <dc:creator>ctchiu888</dc:creator>
      <dc:date>2014-10-03T22:28:13Z</dc:date>
    </item>
    <item>
      <title>Re: Run Spark App Error</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/19774#M2745</link>
      <description>&lt;P&gt;Solved this by having following property defined in workflow.xml.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&amp;lt;configuration&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;lt;property&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;lt;name&amp;gt;oozie.launcher.mapreduce.job.user.classpath.first&amp;lt;/name&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;lt;value&amp;gt;true&amp;lt;/value&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;lt;/property&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;.....&lt;/DIV&gt;&lt;DIV&gt;&amp;lt;/configuration&amp;gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;</description>
      <pubDate>Mon, 06 Oct 2014 21:10:21 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Run-Spark-App-Error/m-p/19774#M2745</guid>
      <dc:creator>ctchiu888</dc:creator>
      <dc:date>2014-10-06T21:10:21Z</dc:date>
    </item>
  </channel>
</rss>

