<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: can someone point me to a good tutorial on spark streaming to use with kafka in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132527#M31482</link>
    <description>&lt;P&gt;nice article..&lt;/P&gt;</description>
    <pubDate>Fri, 10 Jun 2016 18:22:03 GMT</pubDate>
    <dc:creator>rajkumar_singh</dc:creator>
    <dc:date>2016-06-10T18:22:03Z</dc:date>
    <item>
      <title>can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132523#M31478</link>
      <description>&lt;P&gt;I am trying to fetch json format data from kafka through spark streaming and want to create a temp table in spark to query json data like normal table.&lt;/P&gt;&lt;P&gt;i tried several tutorials available on internet but did'nt get success. I am able to read a text file from hdfs and process it through spark, but stuck using json data from kafka.&lt;/P&gt;&lt;P&gt;can somebody guide me on this.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 17:18:19 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132523#M31478</guid>
      <dc:creator>tajinderdhanjal</dc:creator>
      <dc:date>2016-06-10T17:18:19Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132524#M31479</link>
      <description>&lt;P&gt;Could you please try this &lt;/P&gt;&lt;P&gt;&lt;A href="http://blog.jaceklaskowski.pl/2015/07/20/real-time-data-processing-using-apache-kafka-and-spark-streaming.html" target="_blank"&gt;http://blog.jaceklaskowski.pl/2015/07/20/real-time-data-processing-using-apache-kafka-and-spark-streaming.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 17:22:58 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132524#M31479</guid>
      <dc:creator>rajkumar_singh</dc:creator>
      <dc:date>2016-06-10T17:22:58Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132525#M31480</link>
      <description>&lt;A rel="user" href="https://community.cloudera.com/users/3719/tajinderdhanjal93.html" nodeid="3719"&gt;@Tajinderpal Singh&lt;/A&gt;&lt;P&gt;You can refer to below Spark documentation:&lt;/P&gt;&lt;P&gt;&lt;A href="http://spark.apache.org/docs/latest/streaming-kafka-integration.html" target="_blank"&gt;http://spark.apache.org/docs/latest/streaming-kafka-integration.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Thanks and Regards,&lt;/P&gt;&lt;P&gt;Sindhu&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 17:39:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132525#M31480</guid>
      <dc:creator>ssubhas</dc:creator>
      <dc:date>2016-06-10T17:39:55Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132526#M31481</link>
      <description>&lt;P&gt;And shameless plug:&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/content/kbentry/25726/spark-streaming-explained-kafka-to-phoenix.html" target="_blank"&gt;https://community.hortonworks.com/content/kbentry/25726/spark-streaming-explained-kafka-to-phoenix.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You can have a look at the parser class I wrote. You would need to write something similar that parses your JSON object and returns a Java/Scala object that you can then use in your analytics&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 17:53:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132526#M31481</guid>
      <dc:creator>bleonhardi</dc:creator>
      <dc:date>2016-06-10T17:53:32Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132527#M31482</link>
      <description>&lt;P&gt;nice article..&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 18:22:03 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132527#M31482</guid>
      <dc:creator>rajkumar_singh</dc:creator>
      <dc:date>2016-06-10T18:22:03Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132528#M31483</link>
      <description>&lt;P&gt;Thanks a lot &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 21:59:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132528#M31483</guid>
      <dc:creator>bleonhardi</dc:creator>
      <dc:date>2016-06-10T21:59:28Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132529#M31484</link>
      <description>&lt;P&gt;I have an annotated Scala example here:   &lt;A href="https://community.hortonworks.com/articles/33275/receiving-avro-messages-through-kafka-in-a-spark-s.html" target="_blank"&gt;https://community.hortonworks.com/articles/33275/receiving-avro-messages-through-kafka-in-a-spark-s.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Jun 2016 22:14:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132529#M31484</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-06-10T22:14:05Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132530#M31485</link>
      <description>&lt;PRE&gt;I have created a kafka producer --

from kafka import KafkaProducer
import json,time


userdata={
        "ipaddress": "172.16.0.57",
        "logtype": "",
        "mid": "",
        "newsession": "4917279149950184029a78e4a-e694-438f-b994-39897e346953",
        "previousurl": "/",
        "searchtext": "",
        "sessionid": "29a78e4a-e694-438f-b994-39897e346953",
        "source": "desktop",
        "uid": "Chrome4929a78e4a-e694-438f-b994-39897e346953",
        "url": "http://172.16.0.57/",
        "useragent": "Mozilla/5.0%20(Windows%20NT%2010.0",
        "utmsocial": "null",
        "utmsource": "null",
        "createdtime": "2016-05-03 12:27:38",
        "latency": 13260.0,
        "serviceurl": "http://localhost:8080/Business-Web/services/product/getBestDealNew",
        "domainlayeripaddress": "localhost",
        "name":"TJ"
}


producer = KafkaProducer(bootstrap_servers=['172.16.10.13:6667','172.16.10.14:6667'],value_serializer=lambda v: json.dumps(v).encode('utf-8'))
for i in range(10):
    print("adding",i)
    producer.send('event', userdata)
    #if i &amp;lt; 10:
     #   producer.send('event', '\n')
    time.sleep(3)


&lt;/PRE&gt;&lt;P&gt;And python code to consume json data from kafka . I run this python code like.&lt;/P&gt;&lt;P&gt;spark-submit --jars /usr/hdp/2.3.4.7-4/spark/lib/spark-assembly-1.5.2.2.3.4.7-4-hadoop2.7.1.2.3.4.7-4.jar,/usr/hdp/2.3.4.7-4/spark/lib/spark-streaming-kafka-assembly_2.10-1.6.1.jar /home/hadoop/tajinder/clickstream_streaming.py&lt;/P&gt;&lt;PRE&gt;from pyspark.sql import SQLContext
from pyspark import SparkContext, SparkConf
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils

import json
sc = SparkContext(appName="Clickstream_kafka")
stream = StreamingContext(sc, 2)
kafka_stream = KafkaUtils.createStream(stream,"172.16.10.13:2181","raw-event-streaming-consumer",{"event":1})
parsed = kafka_stream.map(lambda (k, v): json.loads(v))
parsed.pprint()
stream.start()
stream.awaitTermination()
&lt;/PRE&gt;&lt;P&gt;I am able to recieve json data in spark from kafka, but how to convert it to RDD or as table(schema RDD) in pyspark so that RDD operations can be applied on it?  &lt;/P&gt;</description>
      <pubDate>Sat, 11 Jun 2016 21:36:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132530#M31485</guid>
      <dc:creator>tajinderdhanjal</dc:creator>
      <dc:date>2016-06-11T21:36:49Z</dc:date>
    </item>
    <item>
      <title>Re: can someone point me to a good tutorial on spark streaming to use with kafka</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132531#M31486</link>
      <description>&lt;P&gt;Did you look at jsonRDD something like this&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;val jsonSchemaRDD = sqlContext.jsonRDD(jsons)// Pass in RDD directly
jsonSchemaRDD.registerTempTable("testjson")
sqlContext.sql("SELECT * FROM testjson where .... ").collect &lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 08 Dec 2016 11:24:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/can-someone-point-me-to-a-good-tutorial-on-spark-streaming/m-p/132531#M31486</guid>
      <dc:creator>jayadeep_jayara</dc:creator>
      <dc:date>2016-12-08T11:24:51Z</dc:date>
    </item>
  </channel>
</rss>

