<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question which file its producing   , JSON or AVRO ? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153113#M44686</link>
    <description>&lt;P&gt;with the commands below , what type of file is being produced  . JSON or AVRO ?&lt;/P&gt;&lt;PRE&gt;flume-ng agent --conf ./conf/ -f conf/twitter-to-hdfs.properties --name TwitterAgent  -Dflume.root.logger=WARN,console -Dtwitter4j.http.proxyHost=proxy.server.com -Dtwitter4j.http.proxyPort=8080
[flume@hadoop1 conf]$ pwd
/home/flume/conf
[flume@hadoop1 conf]$
[flume@hadoop1 conf]$ more twitter-to-hdfs.properties
########################################################
# Twitter agent for collecting Twitter data to HDFS.
########################################################
TwitterAgent.sources = Twitter
TwitterAgent.channels = MemChannel
TwitterAgent.sinks = HDFS
########################################################
# Describing and configuring the sources
########################################################
TwitterAgent.sources.Twitter.type = org.apache.flume.source.twitter.TwitterSource
TwitterAgent.sources.Twitter.Channels = MemChannel
TwitterAgent.sources.Twitter.consumerKey = xxxxxxxx
TwitterAgent.sources.Twitter.consumerSecret =xxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.accessToken = xxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.accessTokenSecret = xxxxxxxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.Keywords = hadoop,Data Scientist,BigData,Trump,computing,flume,Nifi
#######################################################
# Twitter configuring  HDFS sink
########################################################
TwitterAgent.sinks.HDFS.hdfs.useLocalTimeStamp = true
TwitterAgent.sinks.HDFS.channel = MemChannel
TwitterAgent.sinks.HDFS.type = hdfs
TwitterAgent.sinks.HDFS.hdfs.path = hdfs://hadoop1:8020/user/flume/tweets
TwitterAgent.sinks.HDFS.hdfs.fileType = DataStream
TwitterAgent.sinks.HDFS.hdfs.WriteFormat = Text
TwitterAgent.sinks.HDFS.hdfs.batchSize = 1000
TwitterAgent.sinks.HDFS.hdfs.rollSize = 0
TwitterAgent.sinks.HDFS.hdfs.rollCount = 10000
#######################################################
# Twitter Channel
########################################################
TwitterAgent.channels.MemChannel.type = memory
TwitterAgent.channels.MemChannel.capacity = 20000
#TwitterAgent.channels.MemChannel.DataDirs =
TwitterAgent.channels.MemChannel.transactionCapacity =1000
#######################################################
# Binding the Source and the Sink to the Channel
########################################################
TwitterAgent.sources.Twitter.channels = MemChannel
TwitterAgent.sinks.HDFS.channels = MemChannel
[flume@hadoop1 conf]$

&lt;/PRE&gt;</description>
    <pubDate>Thu, 27 Oct 2016 22:54:03 GMT</pubDate>
    <dc:creator>aliyesami</dc:creator>
    <dc:date>2016-10-27T22:54:03Z</dc:date>
    <item>
      <title>which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153113#M44686</link>
      <description>&lt;P&gt;with the commands below , what type of file is being produced  . JSON or AVRO ?&lt;/P&gt;&lt;PRE&gt;flume-ng agent --conf ./conf/ -f conf/twitter-to-hdfs.properties --name TwitterAgent  -Dflume.root.logger=WARN,console -Dtwitter4j.http.proxyHost=proxy.server.com -Dtwitter4j.http.proxyPort=8080
[flume@hadoop1 conf]$ pwd
/home/flume/conf
[flume@hadoop1 conf]$
[flume@hadoop1 conf]$ more twitter-to-hdfs.properties
########################################################
# Twitter agent for collecting Twitter data to HDFS.
########################################################
TwitterAgent.sources = Twitter
TwitterAgent.channels = MemChannel
TwitterAgent.sinks = HDFS
########################################################
# Describing and configuring the sources
########################################################
TwitterAgent.sources.Twitter.type = org.apache.flume.source.twitter.TwitterSource
TwitterAgent.sources.Twitter.Channels = MemChannel
TwitterAgent.sources.Twitter.consumerKey = xxxxxxxx
TwitterAgent.sources.Twitter.consumerSecret =xxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.accessToken = xxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.accessTokenSecret = xxxxxxxxxxxxxxxxxxxxxxx
TwitterAgent.sources.Twitter.Keywords = hadoop,Data Scientist,BigData,Trump,computing,flume,Nifi
#######################################################
# Twitter configuring  HDFS sink
########################################################
TwitterAgent.sinks.HDFS.hdfs.useLocalTimeStamp = true
TwitterAgent.sinks.HDFS.channel = MemChannel
TwitterAgent.sinks.HDFS.type = hdfs
TwitterAgent.sinks.HDFS.hdfs.path = hdfs://hadoop1:8020/user/flume/tweets
TwitterAgent.sinks.HDFS.hdfs.fileType = DataStream
TwitterAgent.sinks.HDFS.hdfs.WriteFormat = Text
TwitterAgent.sinks.HDFS.hdfs.batchSize = 1000
TwitterAgent.sinks.HDFS.hdfs.rollSize = 0
TwitterAgent.sinks.HDFS.hdfs.rollCount = 10000
#######################################################
# Twitter Channel
########################################################
TwitterAgent.channels.MemChannel.type = memory
TwitterAgent.channels.MemChannel.capacity = 20000
#TwitterAgent.channels.MemChannel.DataDirs =
TwitterAgent.channels.MemChannel.transactionCapacity =1000
#######################################################
# Binding the Source and the Sink to the Channel
########################################################
TwitterAgent.sources.Twitter.channels = MemChannel
TwitterAgent.sinks.HDFS.channels = MemChannel
[flume@hadoop1 conf]$

&lt;/PRE&gt;</description>
      <pubDate>Thu, 27 Oct 2016 22:54:03 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153113#M44686</guid>
      <dc:creator>aliyesami</dc:creator>
      <dc:date>2016-10-27T22:54:03Z</dc:date>
    </item>
    <item>
      <title>Re: which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153114#M44687</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/10115/sahmad43.html" nodeid="10115"&gt;@Sami Ahmad&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Flume is simply moving your data from source to target. In this case from twitter to HDFS. I believe twitter sends JSON records. This means file being written is JSON format. Flume is not altering your file format. It is only moving data.&lt;/P&gt;</description>
      <pubDate>Thu, 27 Oct 2016 23:28:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153114#M44687</guid>
      <dc:creator>mqureshi</dc:creator>
      <dc:date>2016-10-27T23:28:50Z</dc:date>
    </item>
    <item>
      <title>Re: which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153115#M44688</link>
      <description>&lt;P&gt;&lt;A href="https://community.cloudera.com/legacyfs/online/attachments/8943-flumedata.zip"&gt;flumedata.zip&lt;/A&gt;if that is the case then its not matching the JSON format . please see the attached file &lt;/P&gt;</description>
      <pubDate>Fri, 28 Oct 2016 00:08:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153115#M44688</guid>
      <dc:creator>aliyesami</dc:creator>
      <dc:date>2016-10-28T00:08:59Z</dc:date>
    </item>
    <item>
      <title>Re: which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153116#M44689</link>
      <description>&lt;P&gt;Your output is AVRO.&lt;/P&gt;&lt;P&gt;I looked at your ZIP and that's an AVRO file.&lt;/P&gt;&lt;P&gt;Flume outputs AVRO from twitter &lt;/P&gt;&lt;P&gt;&lt;A href="https://www.tutorialspoint.com/apache_flume/fetching_twitter_data.htm" target="_blank"&gt;https://www.tutorialspoint.com/apache_flume/fetching_twitter_data.htm&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You can also ingest Twitter to HDFS via Apache NiFi&lt;/P&gt;&lt;P&gt;&lt;A href="http://hortonworks.com/blog/hdf-2-0-flow-processing-real-time-tweets-strata-hadoop-slack-tensorflow-phoenix-zeppelin/" target="_blank"&gt;http://hortonworks.com/blog/hdf-2-0-flow-processing-real-time-tweets-strata-hadoop-slack-tensorflow-phoenix-zeppelin/&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 28 Oct 2016 00:20:58 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153116#M44689</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-10-28T00:20:58Z</dc:date>
    </item>
    <item>
      <title>Re: which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153117#M44690</link>
      <description>&lt;P&gt;it's avro format&lt;/P&gt;</description>
      <pubDate>Fri, 28 Oct 2016 00:21:29 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153117#M44690</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-10-28T00:21:29Z</dc:date>
    </item>
    <item>
      <title>Re: which file its producing   , JSON or AVRO ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153118#M44691</link>
      <description>&lt;P&gt;as you can see I cant read it using JSON&lt;/P&gt;&lt;PRE&gt;[hdfs@hadoop1 ~]$ more a.py
#!/usr/bin python
import json
with open('FlumeData.1477426267073') as f:
        data = f.read()
        jsondata = json.loads(data)
print jsondata
[hdfs@hadoop1 ~]$ python a.py
Traceback (most recent call last):
  File "a.py", line 7, in &amp;lt;module&amp;gt;
    jsondata = json.loads(data)
  File "/usr/lib64/python2.6/json/__init__.py", line 307, in loads
    return _default_decoder.decode(s)
  File "/usr/lib64/python2.6/json/decoder.py", line 319, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib64/python2.6/json/decoder.py", line 338, in raw_decode
    raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded

&lt;/PRE&gt;</description>
      <pubDate>Fri, 28 Oct 2016 00:22:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/which-file-its-producing-JSON-or-AVRO/m-p/153118#M44691</guid>
      <dc:creator>aliyesami</dc:creator>
      <dc:date>2016-10-28T00:22:16Z</dc:date>
    </item>
  </channel>
</rss>

