<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Spark Streaming job not reading data from Kafka in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/348242#M235345</link>
    <description>&lt;P&gt;Hii somant,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;there are some information that need to be provided in order to drive the investigation:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;From where are you launching the job? E.g. from a gateway of your CDH cluster?&lt;/LI&gt;&lt;LI&gt;Can you please share your spark-submit command?&lt;/LI&gt;&lt;LI&gt;You are saying the job is not starting up, do you have any log (Spark driver logs, YARN logs)?&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Wed, 20 Jul 2022 15:18:51 GMT</pubDate>
    <dc:creator>amallegni</dc:creator>
    <dc:date>2022-07-20T15:18:51Z</dc:date>
    <item>
      <title>Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/345742#M234628</link>
      <description>&lt;P&gt;Hi,&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to run spark streaming job on cloudera v&lt;SPAN&gt;5.14.4 which will be reading data from kafka v0.10. I am using following version of spark streaming dependencies&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;spark-streaming_2.11:2.3.0&lt;BR /&gt;spark-streaming-kafka-0-10_2.11:2.3.0&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;The job works fine on local but on cloudera it is not starting up. There is no streaming tab on AM and stages/storage tabs are blank.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="somant_0-1655367352711.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/34608i40C5B77BD9257A18/image-size/medium?v=v2&amp;amp;px=400" role="button" title="somant_0-1655367352711.png" alt="somant_0-1655367352711.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;There are no errors in the logs. Any suggestion what might be wrong here?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 21 Apr 2026 07:55:09 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/345742#M234628</guid>
      <dc:creator>somant</dc:creator>
      <dc:date>2026-04-21T07:55:09Z</dc:date>
    </item>
    <item>
      <title>Re: Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/348242#M235345</link>
      <description>&lt;P&gt;Hii somant,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;there are some information that need to be provided in order to drive the investigation:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;From where are you launching the job? E.g. from a gateway of your CDH cluster?&lt;/LI&gt;&lt;LI&gt;Can you please share your spark-submit command?&lt;/LI&gt;&lt;LI&gt;You are saying the job is not starting up, do you have any log (Spark driver logs, YARN logs)?&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Wed, 20 Jul 2022 15:18:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/348242#M235345</guid>
      <dc:creator>amallegni</dc:creator>
      <dc:date>2022-07-20T15:18:51Z</dc:date>
    </item>
    <item>
      <title>Re: Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/348895#M235476</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/83846"&gt;@amallegni&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;-I am launching the jobs from one of the cluster nodes&lt;/P&gt;&lt;P&gt;-SPARK_KAFKA_VERSION=0.10 spark2-submit --master yarn --deploy-mode cluster --name appName --keytab "****.keytab" --principal "****" --num-executors 3 --driver-memory 4g --executor-memory 3g --executor-cores 3 --files "/path/to/hive-site.xml,/path/to/jaas.conf#jaas.conf" --conf "spark.streaming.unpersist=true" --conf "spark.executor.userClassPathFirst=true" --conf "spark.streaming.kafka.maxRatePerPartition=10" --conf "spark.streaming.kafka.maxRetries=5" --conf "spark.streaming.backpressure.enabled=true" --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -Djava.security.krb5.conf=/etc/krb5.conf -Djava.security.auth.login.config=./jaas.conf" --conf "spark.driver.extraJavaOptions=-XX:+UseG1GC -Djava.security.krb5.conf=/etc/krb5.conf -Djava.security.auth.login.config=./jaas.conf" --conf spark.yarn.maxAppAttempts=4 --conf spark.yarn.am.attemptFailuresValidityInterval=1h --conf spark.yarn.max.executor.failures=24 --conf spark.yarn.executor.failuresValidityInterval=1h --properties-file /path/to/app.properties&lt;/P&gt;&lt;P&gt;-I am not finding anything in logs.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The only similar issue I found out was this one&amp;nbsp;&lt;A href="https://mcas-proxyweb.mcas.ms/certificate-checker?login=false&amp;amp;originalUrl=https%3A%2F%2Fstackoverflow.com.mcas.ms%2Fquestions%2F44454520%2Fspark-streaming-is-not-streaming-but-waits-showing-consumer-config-values%3FMcasTsid%3D20893&amp;amp;McasCSRF=130dd2f8ede02107e760bf6e42e0f338df580e10aaf019beba00dafaa816fc57" target="_blank" rel="noopener noreferrer"&gt;https://stackoverflow.com/questions/44454520/spark-streaming-is-not-streaming-but-waits-showing-consumer-config-values&lt;/A&gt;&lt;SPAN&gt;. I also have a kafka installation version 0.9 and my job needs 0.10 I'm wondering if there is any way that the spark job doesn't depend on the CDH provided libraries and only use packaged dependencies.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 27 Jul 2022 13:36:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/348895#M235476</guid>
      <dc:creator>somant</dc:creator>
      <dc:date>2022-07-27T13:36:33Z</dc:date>
    </item>
    <item>
      <title>Re: Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/349275#M235589</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/98611"&gt;@somant&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;regarding this " I'm wondering if there is any way that the spark job doesn't depend on the CDH provided libraries and only use packaged dependencies", it depends on the library you are referring to.&lt;/P&gt;&lt;P&gt;If you are referring to kafka libraries, they shouldn't be loaded by spark classpath by default, hence you can prepare a fat jar including the kafka dependencies choosing the version you need.&lt;/P&gt;&lt;P&gt;Moreover, did you try by specifying less options at launch time?&amp;nbsp;&lt;BR /&gt;E.g. I would start by removing the usage of G1GC and other advanced options, monitoring the behaviour one step at a time.&lt;/P&gt;</description>
      <pubDate>Mon, 01 Aug 2022 15:03:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/349275#M235589</guid>
      <dc:creator>amallegni</dc:creator>
      <dc:date>2022-08-01T15:03:11Z</dc:date>
    </item>
    <item>
      <title>Re: Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/349298#M235600</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/83846"&gt;@amallegni&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for replying. I created a simple word count job and I'm running it using this command&amp;nbsp;&lt;/P&gt;&lt;P&gt;SPARK_KAFKA_VERSION=0.10 /opt/cloudera/parcels/SPARK2/bin/spark2-submit --num-executors 2 --master local[4] --driver-java-options "-Djava.security.auth.login.config=/apps/raiot/dev/confluent/config/jaas.conf" --class com.cloudera.spark.examples.DirectKafkaWordCount --conf "spark.executor.extraJavaOptions=-Djava.security.auth.login.config=/apps/raiot/dev/confluent/config/jaas.conf" spark-dstream-secure-kafka-app-1.0-SNAPSHOT-jar-with-dependencies.jar&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here are the logs&lt;/P&gt;&lt;P&gt;22/08/02 00:53:18 INFO spark.SparkContext: Running Spark version 2.3.0.cloudera3&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SparkContext: Submitted application: DirectKafkaWordCount&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SecurityManager: Changing view acls to:&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SecurityManager: Changing modify acls to:&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SecurityManager: Changing view acls groups to:&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SecurityManager: Changing modify acls groups to:&lt;BR /&gt;22/08/02 00:53:18 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(user, loaddev); groups with view permissions: Set(); users with modify permissions: Set(skumar116, loaddev); groups with modify permissions: Set()&lt;BR /&gt;22/08/02 00:53:18 INFO util.Utils: Successfully started service 'sparkDriver' on port 33638.&lt;BR /&gt;22/08/02 00:53:19 INFO spark.SparkEnv: Registering MapOutputTracker&lt;BR /&gt;22/08/02 00:53:19 INFO spark.SparkEnv: Registering BlockManagerMaster&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up&lt;BR /&gt;22/08/02 00:53:19 INFO storage.DiskBlockManager: Created local directory at /tmp/blockmgr-1811ccb1-0aa0-489c-aec0-e771d23f4a88&lt;BR /&gt;22/08/02 00:53:19 INFO memory.MemoryStore: MemoryStore started with capacity 12.3 GB&lt;BR /&gt;22/08/02 00:53:19 INFO spark.SparkEnv: Registering OutputCommitCoordinator&lt;BR /&gt;22/08/02 00:53:19 INFO util.log: Logging initialized @2986ms&lt;BR /&gt;22/08/02 00:53:19 INFO server.Server: jetty-9.3.z-SNAPSHOT&lt;BR /&gt;22/08/02 00:53:19 INFO server.Server: Started @3102ms&lt;BR /&gt;22/08/02 00:53:19 INFO server.AbstractConnector: Started ServerConnector@1804f60d{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}&lt;BR /&gt;22/08/02 00:53:19 INFO util.Utils: Successfully started service 'SparkUI' on port 4040.&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@512d92b{/jobs,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3c321bdb{/jobs/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3abd581e{/jobs/job,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6f0628de{/jobs/job/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1e392345{/stages,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4ced35ed{/stages/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7bd69e82{/stages/stage,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@27dc79f7{/stages/stage/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3aaf4f07{/stages/pool,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@18e8473e{/stages/pool/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1a38ba58{/storage,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6058e535{/storage/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1deb2c43{/storage/rdd,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1cefc4b3{/storage/rdd/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6f6a7463{/environment,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@79f227a9{/environment/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@50d68830{/executors,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7674a051{/executors/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@6754ef00{/executors/threadDump,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@323e8306{/executors/threadDump/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4acf72b6{/static,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7dd712e8{/,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@22ee2d0{/api,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4b770e40{/jobs/job/kill,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@54a3ab8f{/stages/stage/kill,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:19 INFO ui.SparkUI: Bound SparkUI to 0.0.0.0, and started at &lt;A href="http://hostname:4040" target="_blank"&gt;http://hostname:4040&lt;/A&gt;&lt;BR /&gt;22/08/02 00:53:19 INFO spark.SparkContext: Added JAR file:/apps/raiot/dev/confluent/build/spark-dstream-secure-kafka-app-1.0-SNAPSHOT-jar-with-dependencies.jar at spark://hostname:33638/jars/spark-dstream-secure-kafka-app-1.0-SNAPSHOT-jar-with-dependencies.jar with timestamp 1659401599561&lt;BR /&gt;22/08/02 00:53:19 INFO executor.Executor: Starting executor ID driver on host localhost&lt;BR /&gt;22/08/02 00:53:19 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 43751.&lt;BR /&gt;22/08/02 00:53:19 INFO netty.NettyBlockTransferService: Server created on hostname:43751&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(driver, hostname, 43751, None)&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManagerMasterEndpoint: Registering block manager hostname:43751 with 12.3 GB RAM, BlockManagerId(driver, hostname, 43751, None)&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(driver, hostname, 43751, None)&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManager: external shuffle service port = 7337&lt;BR /&gt;22/08/02 00:53:19 INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(driver, hostname, 43751, None)&lt;BR /&gt;22/08/02 00:53:19 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@340b7ef6{/metrics/json,null,AVAILABLE,@Spark}&lt;BR /&gt;22/08/02 00:53:21 INFO scheduler.EventLoggingListener: Logging events to hdfs://nameservice1/user/spark/spark2ApplicationHistory/local-1659401599608&lt;BR /&gt;22/08/02 00:53:21 INFO spark.SparkContext: Registered listener com.cloudera.spark.lineage.NavigatorAppListener&lt;BR /&gt;22/08/02 00:53:22 WARN kafka010.KafkaUtils: overriding enable.auto.commit to false for executor&lt;BR /&gt;22/08/02 00:53:22 WARN kafka010.KafkaUtils: overriding auto.offset.reset to none for executor&lt;BR /&gt;22/08/02 00:53:22 WARN kafka010.KafkaUtils: overriding executor group.id to spark-executor-db_c1_parser_pap_dev&lt;BR /&gt;22/08/02 00:53:22 WARN kafka010.KafkaUtils: overriding receive.buffer.bytes to 65536 see KAFKA-3135&lt;BR /&gt;22/08/02 00:53:22 INFO kafka010.DirectKafkaInputDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO kafka010.DirectKafkaInputDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO kafka010.DirectKafkaInputDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO kafka010.DirectKafkaInputDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO kafka010.DirectKafkaInputDStream: Initialized and validated org.apache.spark.streaming.kafka010.DirectKafkaInputDStream@19661827&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Initialized and validated org.apache.spark.streaming.dstream.MappedDStream@40afb4d2&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.FlatMappedDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.FlatMappedDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.FlatMappedDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.FlatMappedDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.FlatMappedDStream: Initialized and validated org.apache.spark.streaming.dstream.FlatMappedDStream@41a0f437&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.MappedDStream: Initialized and validated org.apache.spark.streaming.dstream.MappedDStream@63e6273b&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ShuffledDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ShuffledDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ShuffledDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ShuffledDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ShuffledDStream: Initialized and validated org.apache.spark.streaming.dstream.ShuffledDStream@4a68ac40&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ForEachDStream: Slide time = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ForEachDStream: Storage level = Serialized 1x Replicated&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ForEachDStream: Checkpoint interval = null&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ForEachDStream: Remember interval = 20000 ms&lt;BR /&gt;22/08/02 00:53:22 INFO dstream.ForEachDStream: Initialized and validated org.apache.spark.streaming.dstream.ForEachDStream@77dc88fb&lt;BR /&gt;22/08/02 00:53:22 INFO consumer.ConsumerConfig: ConsumerConfig values:&lt;BR /&gt;metric.reporters = []&lt;BR /&gt;metadata.max.age.ms = 300000&lt;BR /&gt;partition.assignment.strategy = [org.apache.kafka.clients.consumer.RangeAssignor]&lt;BR /&gt;reconnect.backoff.ms = 50&lt;BR /&gt;sasl.kerberos.ticket.renew.window.factor = 0.8&lt;BR /&gt;max.partition.fetch.bytes = 1048576&lt;BR /&gt;bootstrap.servers = [bootstrap-servers]&lt;BR /&gt;ssl.keystore.type = JKS&lt;BR /&gt;enable.auto.commit = false&lt;BR /&gt;sasl.mechanism = PLAIN&lt;BR /&gt;interceptor.classes = null&lt;BR /&gt;exclude.internal.topics = true&lt;BR /&gt;ssl.truststore.password = null&lt;BR /&gt;client.id =&lt;BR /&gt;ssl.endpoint.identification.algorithm = null&lt;BR /&gt;max.poll.records = 2147483647&lt;BR /&gt;check.crcs = true&lt;BR /&gt;request.timeout.ms = 40000&lt;BR /&gt;heartbeat.interval.ms = 3000&lt;BR /&gt;auto.commit.interval.ms = 5000&lt;BR /&gt;receive.buffer.bytes = 65536&lt;BR /&gt;ssl.truststore.type = JKS&lt;BR /&gt;ssl.truststore.location = null&lt;BR /&gt;ssl.keystore.password = null&lt;BR /&gt;fetch.min.bytes = 1&lt;BR /&gt;send.buffer.bytes = 131072&lt;BR /&gt;value.deserializer = class org.apache.kafka.common.serialization.StringDeserializer&lt;BR /&gt;group.id = db_c1_parser_pap_dev&lt;BR /&gt;retry.backoff.ms = 100&lt;BR /&gt;ssl.secure.random.implementation = null&lt;BR /&gt;sasl.kerberos.kinit.cmd = /usr/bin/kinit&lt;BR /&gt;sasl.kerberos.service.name = null&lt;BR /&gt;sasl.kerberos.ticket.renew.jitter = 0.05&lt;BR /&gt;ssl.trustmanager.algorithm = PKIX&lt;BR /&gt;ssl.key.password = null&lt;BR /&gt;fetch.max.wait.ms = 500&lt;BR /&gt;sasl.kerberos.min.time.before.relogin = 60000&lt;BR /&gt;connections.max.idle.ms = 540000&lt;BR /&gt;session.timeout.ms = 30000&lt;BR /&gt;metrics.num.samples = 2&lt;BR /&gt;key.deserializer = class org.apache.kafka.common.serialization.StringDeserializer&lt;BR /&gt;ssl.protocol = TLS&lt;BR /&gt;ssl.provider = null&lt;BR /&gt;ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]&lt;BR /&gt;ssl.keystore.location = null&lt;BR /&gt;ssl.cipher.suites = null&lt;BR /&gt;security.protocol = SASL_SSL&lt;BR /&gt;ssl.keymanager.algorithm = SunX509&lt;BR /&gt;metrics.sample.window.ms = 30000&lt;BR /&gt;auto.offset.reset = earliest&lt;/P&gt;&lt;P&gt;22/08/02 00:53:22 INFO authenticator.AbstractLogin: Successfully logged in.&lt;BR /&gt;22/08/02 00:53:22 INFO consumer.ConsumerConfig: ConsumerConfig values:&lt;BR /&gt;metric.reporters = []&lt;BR /&gt;metadata.max.age.ms = 300000&lt;BR /&gt;partition.assignment.strategy = [org.apache.kafka.clients.consumer.RangeAssignor]&lt;BR /&gt;reconnect.backoff.ms = 50&lt;BR /&gt;sasl.kerberos.ticket.renew.window.factor = 0.8&lt;BR /&gt;max.partition.fetch.bytes = 1048576&lt;BR /&gt;bootstrap.servers = [bootstrap-servers]&lt;BR /&gt;ssl.keystore.type = JKS&lt;BR /&gt;enable.auto.commit = false&lt;BR /&gt;sasl.mechanism = PLAIN&lt;BR /&gt;interceptor.classes = null&lt;BR /&gt;exclude.internal.topics = true&lt;BR /&gt;ssl.truststore.password = null&lt;BR /&gt;client.id = consumer-1&lt;BR /&gt;ssl.endpoint.identification.algorithm = null&lt;BR /&gt;max.poll.records = 2147483647&lt;BR /&gt;check.crcs = true&lt;BR /&gt;request.timeout.ms = 40000&lt;BR /&gt;heartbeat.interval.ms = 3000&lt;BR /&gt;auto.commit.interval.ms = 5000&lt;BR /&gt;receive.buffer.bytes = 65536&lt;BR /&gt;ssl.truststore.type = JKS&lt;BR /&gt;ssl.truststore.location = null&lt;BR /&gt;ssl.keystore.password = null&lt;BR /&gt;fetch.min.bytes = 1&lt;BR /&gt;send.buffer.bytes = 131072&lt;BR /&gt;value.deserializer = class org.apache.kafka.common.serialization.StringDeserializer&lt;BR /&gt;group.id = db_c1_parser_pap_dev&lt;BR /&gt;retry.backoff.ms = 100&lt;BR /&gt;ssl.secure.random.implementation = null&lt;BR /&gt;sasl.kerberos.kinit.cmd = /usr/bin/kinit&lt;BR /&gt;sasl.kerberos.service.name = null&lt;BR /&gt;sasl.kerberos.ticket.renew.jitter = 0.05&lt;BR /&gt;ssl.trustmanager.algorithm = PKIX&lt;BR /&gt;ssl.key.password = null&lt;BR /&gt;fetch.max.wait.ms = 500&lt;BR /&gt;sasl.kerberos.min.time.before.relogin = 60000&lt;BR /&gt;connections.max.idle.ms = 540000&lt;BR /&gt;session.timeout.ms = 30000&lt;BR /&gt;metrics.num.samples = 2&lt;BR /&gt;key.deserializer = class org.apache.kafka.common.serialization.StringDeserializer&lt;BR /&gt;ssl.protocol = TLS&lt;BR /&gt;ssl.provider = null&lt;BR /&gt;ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]&lt;BR /&gt;ssl.keystore.location = null&lt;BR /&gt;ssl.cipher.suites = null&lt;BR /&gt;security.protocol = SASL_SSL&lt;BR /&gt;ssl.keymanager.algorithm = SunX509&lt;BR /&gt;metrics.sample.window.ms = 30000&lt;BR /&gt;auto.offset.reset = earliest&lt;/P&gt;&lt;P&gt;22/08/02 00:53:22 INFO utils.AppInfoParser: Kafka version : 0.10.0-kafka-2.1.0&lt;BR /&gt;22/08/02 00:53:22 INFO utils.AppInfoParser: Kafka commitId : unknown&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm running it on one of the cluster nodes. The job gets stuck at this point. Although when I'm running it from local system it is working fine.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;spark-submit2 --num-executors 2 --master local[4] --driver-java-options "-Djava.security.auth.login.config=C:\\Projects\\LocalJobs\\jaas.conf" --class com.cloudera.spark.examples.DirectKafkaWordCount --conf "spark.executor.extraJavaOptions=-Djava.security.auth.login.config=C:\\Projects\\LocalJobs\\jaas.conf" spark-dstream-secure-kafka-app-1.0-SNAPSHOT-jar-with-dependencies.jar&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;22/08/02 06:35:09 INFO AppInfoParser: Kafka version : 0.10.0-kafka-2.1.0&lt;BR /&gt;22/08/02 06:35:09 INFO AppInfoParser: Kafka commitId : unknown&lt;BR /&gt;22/08/02 06:35:09 INFO CachedKafkaConsumer: Initial fetch for spark-executor-group_id Test101 1 1405900&lt;BR /&gt;22/08/02 06:35:11 INFO AbstractCoordinator: Discovered coordinator broker:9092 (id: 2147483643 rack: null) for group spark-executor-group_id.&lt;BR /&gt;22/08/02 06:35:13 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 1036 bytes result sent to driver&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSetManager: Starting task 2.0 in stage 0.0 (TID 2, localhost, executor driver, partition 2, PROCESS_LOCAL, 7722 bytes)&lt;BR /&gt;22/08/02 06:35:13 INFO Executor: Running task 2.0 in stage 0.0 (TID 2)&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 3330 ms on localhost (executor driver) (2/3)&lt;BR /&gt;22/08/02 06:35:13 INFO KafkaRDD: Beginning offset 1403127 is the same as ending offset skipping Test101 0&lt;BR /&gt;22/08/02 06:35:13 INFO Executor: Finished task 2.0 in stage 0.0 (TID 2). 864 bytes result sent to driver&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSetManager: Finished task 2.0 in stage 0.0 (TID 2) in 11 ms on localhost (executor driver) (3/3)&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: ShuffleMapStage 0 (map at DirectKafkaWordCount.scala:63) finished in 3.517 s&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: looking for newly runnable stages&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: running: Set()&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: waiting: Set(ResultStage 1)&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: failed: Set()&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: Submitting ResultStage 1 (ShuffledRDD[4] at reduceByKey at DirectKafkaWordCount.scala:63), which has no missing parents&lt;BR /&gt;22/08/02 06:35:13 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.8 KB, free 1944.9 MB)&lt;BR /&gt;22/08/02 06:35:13 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1732.0 B, free 1944.9 MB)&lt;BR /&gt;22/08/02 06:35:13 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on LAPTOP-TFO6GTNG.bbrouter:61776 (size: 1732.0 B, free: 1944.9 MB)&lt;BR /&gt;22/08/02 06:35:13 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1039&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (ShuffledRDD[4] at reduceByKey at DirectKafkaWordCount.scala:63) (first 15 tasks are for partitions Vector(0))&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 3, localhost, executor driver, partition 0, ANY, 7649 bytes)&lt;BR /&gt;22/08/02 06:35:13 INFO Executor: Running task 0.0 in stage 1.0 (TID 3)&lt;BR /&gt;22/08/02 06:35:13 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 3 blocks&lt;BR /&gt;22/08/02 06:35:13 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 4 ms&lt;BR /&gt;22/08/02 06:35:13 INFO Executor: Finished task 0.0 in stage 1.0 (TID 3). 1420 bytes result sent to driver&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 3) in 31 ms on localhost (executor driver) (1/1)&lt;BR /&gt;22/08/02 06:35:13 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: ResultStage 1 (print at DirectKafkaWordCount.scala:64) finished in 0.038 s&lt;BR /&gt;22/08/02 06:35:13 INFO DAGScheduler: Job 0 finished: print at DirectKafkaWordCount.scala:64, took 3.615580 s&lt;BR /&gt;22/08/02 06:35:13 INFO JobScheduler: Finished job streaming job 1659402300000 ms.0 from job set of time 1659402300000 ms&lt;BR /&gt;22/08/02 06:35:13 INFO JobScheduler: Total delay: 13.190 s for time 1659402300000 ms (execution: 3.643 s)&lt;BR /&gt;22/08/02 06:35:13 INFO ReceivedBlockTracker: Deleting batches:&lt;BR /&gt;22/08/02 06:35:13 INFO InputInfoTracker: remove old batch metadata:&lt;BR /&gt;-------------------------------------------&lt;BR /&gt;Time: 1659402300000 ms&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm not sure what might be the issue when I'm trying to run it on cloudera nodes. Any help is appreciated.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Aug 2022 01:08:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/349298#M235600</guid>
      <dc:creator>somant</dc:creator>
      <dc:date>2022-08-02T01:08:15Z</dc:date>
    </item>
    <item>
      <title>Re: Spark Streaming job not reading data from Kafka</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/351138#M236164</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/98611"&gt;@somant&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please don't use open source libraries and use cluster-supported spark/kafka versions.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Check the following example code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.cloudera.com/t5/Community-Articles/Running-DirectKafkaWordCount-example-in-CDP/ta-p/340402" target="_blank"&gt;https://community.cloudera.com/t5/Community-Articles/Running-DirectKafkaWordCount-example-in-CDP/ta-p/340402&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 30 Aug 2022 11:31:20 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Streaming-job-not-reading-data-from-Kafka/m-p/351138#M236164</guid>
      <dc:creator>RangaReddy</dc:creator>
      <dc:date>2022-08-30T11:31:20Z</dc:date>
    </item>
  </channel>
</rss>

