<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Unable to read json file using elephant-bird,please help in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140747#M39980</link>
    <description>&lt;P&gt;Trying to load the json file which is having null values in it by using &lt;STRONG&gt;elephant-bird JsonLoader&lt;/STRONG&gt;.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;sample.json&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;{"created_at":"Mon Aug 22 10:48:23 +0000 2016","id":767674772662607873,"id_str":"767674772662607873","text":"KPIT Image Result for https:\/\/t.co\/Nas2ZnF1zZ... https:\/\/t.co\/9TnelwtIvm","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":123,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/Nas2ZnF1zZ","expanded_url":"http:\/\/miltonious.com\/","display_url":"miltonious.com","indices":[24,47]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1471862903167"}&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;script:&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;REGISTER piggybank.jar
REGISTER json-simple-1.1.1.jar
REGISTER elephant-bird-pig-4.3.jar
REGISTER elephant-bird-core-4.1.jar
REGISTER elephant-bird-hadoop-compat-4.3.jar

json = LOAD 'sample.json' USING JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');
describe json; dump json;&lt;/PRE&gt;&lt;P&gt;When I dump json,I am getting the following &lt;STRONG&gt;output&lt;/STRONG&gt; and the &lt;STRONG&gt;worning&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;(Mon Aug 22 10:48:23 +0000 2016,767674772662607873,767674772662607873,google Image Result for Twitter Web Client,false,1234,12345,3214,43215,,,,,,,,,,,,,,)&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;WARN org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigHadoopLogger - org.apache.pig.builtin.JsonLoader(UDF_WARNING_1): Bad record, returning null for {complete json}&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;By warning i guess it is getting NULL values. So how can we load a Json which is having null values in it.&lt;/P&gt;&lt;P&gt;And I have tried in another way i.e&lt;/P&gt;&lt;PRE&gt;json = LOAD 'sample.json' USING com.twitter.elephantbird.pig.load.JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');

describe json;&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;Output&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;&lt;/STRONG&gt;Schema for json unknown.&lt;/P&gt;&lt;P&gt;Please suggest me.&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
    <pubDate>Wed, 07 Sep 2016 19:09:50 GMT</pubDate>
    <dc:creator>mohan221213</dc:creator>
    <dc:date>2016-09-07T19:09:50Z</dc:date>
    <item>
      <title>Unable to read json file using elephant-bird,please help</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140747#M39980</link>
      <description>&lt;P&gt;Trying to load the json file which is having null values in it by using &lt;STRONG&gt;elephant-bird JsonLoader&lt;/STRONG&gt;.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;sample.json&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;{"created_at":"Mon Aug 22 10:48:23 +0000 2016","id":767674772662607873,"id_str":"767674772662607873","text":"KPIT Image Result for https:\/\/t.co\/Nas2ZnF1zZ... https:\/\/t.co\/9TnelwtIvm","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":123,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/Nas2ZnF1zZ","expanded_url":"http:\/\/miltonious.com\/","display_url":"miltonious.com","indices":[24,47]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1471862903167"}&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;script:&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;REGISTER piggybank.jar
REGISTER json-simple-1.1.1.jar
REGISTER elephant-bird-pig-4.3.jar
REGISTER elephant-bird-core-4.1.jar
REGISTER elephant-bird-hadoop-compat-4.3.jar

json = LOAD 'sample.json' USING JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');
describe json; dump json;&lt;/PRE&gt;&lt;P&gt;When I dump json,I am getting the following &lt;STRONG&gt;output&lt;/STRONG&gt; and the &lt;STRONG&gt;worning&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;(Mon Aug 22 10:48:23 +0000 2016,767674772662607873,767674772662607873,google Image Result for Twitter Web Client,false,1234,12345,3214,43215,,,,,,,,,,,,,,)&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;WARN org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigHadoopLogger - org.apache.pig.builtin.JsonLoader(UDF_WARNING_1): Bad record, returning null for {complete json}&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;By warning i guess it is getting NULL values. So how can we load a Json which is having null values in it.&lt;/P&gt;&lt;P&gt;And I have tried in another way i.e&lt;/P&gt;&lt;PRE&gt;json = LOAD 'sample.json' USING com.twitter.elephantbird.pig.load.JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');

describe json;&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;Output&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;&lt;/STRONG&gt;Schema for json unknown.&lt;/P&gt;&lt;P&gt;Please suggest me.&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Wed, 07 Sep 2016 19:09:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140747#M39980</guid>
      <dc:creator>mohan221213</dc:creator>
      <dc:date>2016-09-07T19:09:50Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read json file using elephant-bird,please help</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140748#M39981</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/10889/mohan221213.html" nodeid="10889"&gt;@Mohan V&lt;/A&gt; took your sample, ran this on HDP 2.5 Sandbox so using Pig 0.16 rather than 0.15 but otherwise everything else is the same. I also renamed alias json to data.&lt;/P&gt;&lt;PRE&gt;[guest@sandbox ~]$ cat sample.json
{"created_at":"Mon Aug 22 10:48:23 +0000 2016","id":767674772662607873,"id_str":"767674772662607873","text":"KPIT Image Result for https:\/\/t.co\/Nas2ZnF1zZ... https:\/\/t.co\/9TnelwtIvm","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":123,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/Nas2ZnF1zZ","expanded_url":"http:\/\/miltonious.com\/","display_url":"miltonious.com","indices":[24,47]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1471862903167"}
&lt;/PRE&gt;&lt;PRE&gt;[guest@sandbox ~]$ hdfs dfs -put sample.json .&lt;/PRE&gt;&lt;P&gt;used the jars provided by &lt;A rel="user" href="https://community.cloudera.com/users/11288/gkeys.html" nodeid="11288"&gt;@gkeys&lt;/A&gt; in your other question, the only difference is you're mixing 4.3 with 4.1 versions, perhaps that's the issue&lt;/P&gt;&lt;PRE&gt;REGISTER elephant-bird-core-4.1.jar
REGISTER elephant-bird-pig-4.1.jar
REGISTER elephant-bird-hadoop-compat-4.1.jar
REGISTER json-simple-1.1.1.jar

data = LOAD 'sample.json' USING JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');

describe data;
dump data;&lt;/PRE&gt;&lt;P&gt;executing with tez&lt;/P&gt;&lt;PRE&gt;[guest@sandbox ~]$ pig -x tez mohan.pig&lt;/PRE&gt;&lt;PRE&gt;WARNING: Use "yarn jar" to launch YARN applications.
16/09/11 22:16:12 INFO pig.ExecTypeProvider: Trying ExecType : LOCAL
16/09/11 22:16:12 INFO pig.ExecTypeProvider: Trying ExecType : MAPREDUCE
16/09/11 22:16:12 INFO pig.ExecTypeProvider: Trying ExecType : TEZ_LOCAL
16/09/11 22:16:12 INFO pig.ExecTypeProvider: Trying ExecType : TEZ
16/09/11 22:16:12 INFO pig.ExecTypeProvider: Picked TEZ as the ExecType
2016-09-11 22:16:13,002 [main] INFO  org.apache.pig.Main - Apache Pig version 0.16.0.2.5.0.0-817 (rUnversioned directory) compiled Jun 26 2016, 11:34:45
2016-09-11 22:16:13,003 [main] INFO  org.apache.pig.Main - Logging error messages to: /home/guest/pig_1473632173001.log
2016-09-11 22:16:13,813 [main] INFO  org.apache.pig.impl.util.Utils - Default bootup file /home/guest/.pigbootup not found
2016-09-11 22:16:13,940 [main] INFO  org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to hadoop file system at: hdfs://sandbox.hortonworks.com:8020
2016-09-11 22:16:14,389 [main] INFO  org.apache.pig.PigServer - Pig Script ID for the session: PIG-mohan.pig-6b630799-b287-476a-ac2f-ea19ee7d25ae
2016-09-11 22:16:14,761 [main] INFO  org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl - Timeline service address: &lt;A href="http://sandbox.hortonworks.com:8188/ws/v1/timeline/" target="_blank"&gt;http://sandbox.hortonworks.com:8188/ws/v1/timeline/&lt;/A&gt;
2016-09-11 22:16:14,862 [main] INFO  org.apache.pig.backend.hadoop.PigATSClient - Created ATS Hook
2016-09-11 22:16:15,238 [main] INFO  org.apache.pig.impl.util.SpillableMemoryManager - Selected heap (PS Old Gen) of size 698875904 to monitor. collectionUsageThreshold = 489213120, usageThreshold = 489213120
data: {created_at: chararray,id: chararray,id_str: chararray,text: chararray,source: chararray,in_reply_to_status_id: chararray,in_reply_to_status_id_str: chararray,in_reply_to_user_id: chararray,in_reply_to_user_id_str: chararray,in_reply_to_screen_name: chararray,geo: chararray,coordinates: chararray,place: chararray,contributors: chararray,is_quote_status: bytearray,retweet_count: long,favorite_count: chararray,entities: map[],favorited: bytearray,retweeted: bytearray,possibly_sensitive: bytearray,lang: chararray}
2016-09-11 22:16:15,463 [main] INFO  org.apache.pig.tools.pigstats.ScriptState - Pig features used in the script: UNKNOWN
2016-09-11 22:16:15,511 [main] INFO  org.apache.pig.data.SchemaTupleBackend - Key [pig.schematuple] was not set... will not generate code.
2016-09-11 22:16:15,572 [main] INFO  org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer - {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune, ConstantCalculator, GroupByConstParallelSetter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, PartitionFilterOptimizer, PredicatePushdownOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter]}
2016-09-11 22:16:15,696 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher - Tez staging directory is /tmp/guest/staging and resources directory is /tmp/temp1880338694
2016-09-11 22:16:15,743 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.plan.TezCompiler - File concatenation threshold: 100 optimistic? false
2016-09-11 22:16:15,843 [main] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2016-09-11 22:16:15,885 [main] INFO  com.hadoop.compression.lzo.GPLNativeCodeLoader - Loaded native gpl library
2016-09-11 22:16:15,889 [main] INFO  com.hadoop.compression.lzo.LzoCodec - Successfully loaded &amp;amp; initialized native-lzo library [hadoop-lzo rev 7a4b57bedce694048432dd5bf5b90a6c8ccdba80]
2016-09-11 22:16:15,895 [main] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths (combined) to process : 1
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: elephant-bird-core-4.1.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: jackson-core-asl-1.9.13.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: pig-0.16.0.2.5.0.0-817-core-h2.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: elephant-bird-pig-4.1.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: antlr-runtime-3.4.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: automaton-1.11-8.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: elephant-bird-hadoop-compat-4.1.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: json-simple-1.1.1.jar
2016-09-11 22:16:16,331 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Local resource: joda-time-2.8.1.jar
2016-09-11 22:16:16,513 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder - For vertex - scope-2: parallelism=1, memory=256, java opts=-Xmx256m
2016-09-11 22:16:16,513 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder - Processing aliases: data
2016-09-11 22:16:16,513 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder - Detailed locations: data[6,7]
2016-09-11 22:16:16,513 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder - Pig features in the vertex:
2016-09-11 22:16:16,597 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJobCompiler - Total estimated parallelism is 1
2016-09-11 22:16:16,688 [PigTezLauncher-0] INFO  org.apache.pig.tools.pigstats.tez.TezScriptState - Pig script settings are added to the job
2016-09-11 22:16:16,718 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Tez Client Version: [ component=tez-api, version=0.7.0.2.5.0.0-817, revision=85dd709e66a077055a1749469af62f4d1f3818ed, SCM-URL=scm:git:https://git-wip-us.apache.org/repos/asf/tez.git, buildTime=20160623-1449 ]
2016-09-11 22:16:16,907 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl - Timeline service address: &lt;A href="http://sandbox.hortonworks.com:8188/ws/v1/timeline/" target="_blank"&gt;http://sandbox.hortonworks.com:8188/ws/v1/timeline/&lt;/A&gt;
2016-09-11 22:16:16,913 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at sandbox.hortonworks.com/10.0.2.15:8050
2016-09-11 22:16:17,017 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Using org.apache.tez.dag.history.ats.acls.ATSV15HistoryACLPolicyManager to manage Timeline ACLs
2016-09-11 22:16:17,111 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl - Timeline service address: &lt;A href="http://sandbox.hortonworks.com:8188/ws/v1/timeline/" target="_blank"&gt;http://sandbox.hortonworks.com:8188/ws/v1/timeline/&lt;/A&gt;
2016-09-11 22:16:17,117 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Session mode. Starting session.
2016-09-11 22:16:17,120 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClientUtils - Using tez.lib.uris value from configuration: /hdp/apps/2.5.0.0-817/tez/tez.tar.gz
2016-09-11 22:16:17,180 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Tez system stage directory hdfs://sandbox.hortonworks.com:8020/tmp/guest/staging/.tez/application_1473630550492_0001 doesn't exist and is created
2016-09-11 22:16:17,212 [PigTezLauncher-0] INFO  org.apache.tez.dag.history.ats.acls.ATSV15HistoryACLPolicyManager - Created Timeline Domain for History ACLs, domainId=Tez_ATS_application_1473630550492_0001
2016-09-11 22:16:17,580 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.api.impl.YarnClientImpl - Submitted application application_1473630550492_0001
2016-09-11 22:16:17,583 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - The url to track the Tez Session: &lt;A href="http://sandbox.hortonworks.com:8088/proxy/application_1473630550492_0001/" target="_blank"&gt;http://sandbox.hortonworks.com:8088/proxy/application_1473630550492_0001/&lt;/A&gt;
2016-09-11 22:16:23,232 [PigTezLauncher-0] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJob - Submitting DAG PigLatin:mohan.pig-0_scope-0
2016-09-11 22:16:23,232 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Submitting dag to TezSession, sessionName=PigLatin:mohan.pig, applicationId=application_1473630550492_0001, dagName=PigLatin:mohan.pig-0_scope-0, callerContext={ context=PIG, callerType=PIG_SCRIPT_ID, callerId=PIG-mohan.pig-6b630799-b287-476a-ac2f-ea19ee7d25ae }
2016-09-11 22:16:23,600 [PigTezLauncher-0] INFO  org.apache.tez.client.TezClient - Submitted dag to TezSession, sessionName=PigLatin:mohan.pig, applicationId=application_1473630550492_0001, dagName=PigLatin:mohan.pig-0_scope-0
2016-09-11 22:16:23,793 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl - Timeline service address: &lt;A href="http://sandbox.hortonworks.com:8188/ws/v1/timeline/" target="_blank"&gt;http://sandbox.hortonworks.com:8188/ws/v1/timeline/&lt;/A&gt;
2016-09-11 22:16:23,793 [PigTezLauncher-0] INFO  org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at sandbox.hortonworks.com/10.0.2.15:8050
2016-09-11 22:16:23,801 [PigTezLauncher-0] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJob - Submitted DAG PigLatin:mohan.pig-0_scope-0. Application id: application_1473630550492_0001
2016-09-11 22:16:24,640 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher - HadoopJobId: job_1473630550492_0001
2016-09-11 22:16:24,802 [Timer-0] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJob - DAG Status: status=RUNNING, progress=TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: 0, diagnostics=, counters=null
2016-09-11 22:16:28,939 [PigTezLauncher-0] INFO  org.apache.tez.common.counters.Limits - Counter limits initialized with parameters:  GROUP_NAME_MAX=256, MAX_GROUPS=3000, COUNTER_NAME_MAX=64, MAX_COUNTERS=10000
2016-09-11 22:16:28,944 [PigTezLauncher-0] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezJob - DAG Status: status=SUCCEEDED, progress=TotalTasks: 1 Succeeded: 1 Running: 0 Failed: 0 Killed: 0, diagnostics=, counters=Counters: 21
	org.apache.tez.common.counters.DAGCounter
		NUM_SUCCEEDED_TASKS=1
		TOTAL_LAUNCHED_TASKS=1
		DATA_LOCAL_TASKS=1
		AM_CPU_MILLISECONDS=910
		AM_GC_TIME_MILLIS=11
	File System Counters
		HDFS_BYTES_READ=911
		HDFS_BYTES_WRITTEN=253
		HDFS_READ_OPS=4
		HDFS_LARGE_READ_OPS=0
		HDFS_WRITE_OPS=2
	org.apache.tez.common.counters.TaskCounter
		GC_TIME_MILLIS=64
		CPU_MILLISECONDS=2690
		PHYSICAL_MEMORY_BYTES=170393600
		VIRTUAL_MEMORY_BYTES=990400512
		COMMITTED_HEAP_BYTES=170393600
		INPUT_RECORDS_PROCESSED=1
		OUTPUT_RECORDS=1
	TaskCounter_scope_2_INPUT_scope_0
		INPUT_RECORDS_PROCESSED=1
	TaskCounter_scope_2_OUTPUT_scope_1
		OUTPUT_RECORDS=1
	org.apache.pig.PigWarning
		UDF_WARNING_1=1
	org.apache.pig.builtin.JsonLoader
		UDF_WARNING_1=1
2016-09-11 22:16:29,650 [main] WARN  org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher - Encountered Warning UDF_WARNING_1 1 time(s).
2016-09-11 22:16:29,655 [main] INFO  org.apache.pig.tools.pigstats.tez.TezPigScriptStats - Script Statistics:


       HadoopVersion: 2.7.1.2.5.0.0-817
          PigVersion: 0.16.0.2.5.0.0-817
          TezVersion: 0.7.0.2.5.0.0-817
              UserId: guest
            FileName: mohan.pig
           StartedAt: 2016-09-11 22:16:15
          FinishedAt: 2016-09-11 22:16:29
            Features: UNKNOWN


Success!




DAG 0:
                                    Name: PigLatin:mohan.pig-0_scope-0
                           ApplicationId: job_1473630550492_0001
                      TotalLaunchedTasks: 1
                           FileBytesRead: 0
                        FileBytesWritten: 0
                           HdfsBytesRead: 911
                        HdfsBytesWritten: 253
      SpillableMemoryManager spill count: 0
                Bags proactively spilled: 0
             Records proactively spilled: 0


DAG Plan:
Tez vertex scope-2


Vertex Stats:
VertexId Parallelism TotalTasks   InputRecords   ReduceInputRecords  OutputRecords  FileBytesRead FileBytesWritten  HdfsBytesRead HdfsBytesWritten Alias	Feature	Outputs
scope-2            1          1              1                    0              1              0                0            911              253 data		hdfs://sandbox.hortonworks.com:8020/tmp/temp1943556042/tmp-1378487105,


Input(s):
Successfully read 1 records (911 bytes) from: "hdfs://sandbox.hortonworks.com:8020/user/guest/sample.json"


Output(s):
Successfully stored 1 records (253 bytes) in: "hdfs://sandbox.hortonworks.com:8020/tmp/temp1943556042/tmp-1378487105"


2016-09-11 22:16:29,659 [main] WARN  org.apache.pig.data.SchemaTupleBackend - SchemaTupleBackend has already been initialized
2016-09-11 22:16:29,673 [main] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2016-09-11 22:16:29,673 [main] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1
(Mon Aug 22 10:48:23 +0000 2016,767674772662607873,767674772662607873,KPIT Image Result for &lt;A href="https://t.co/Nas2ZnF1zZ" target="_blank"&gt;https://t.co/Nas2ZnF1zZ&lt;/A&gt;. &lt;A href="https://t.co/9TnelwtIvm,&amp;lt;a" target="_blank"&gt;https://t.co/9TnelwtIvm,&amp;lt;a&lt;/A&gt; href="http://twitter.com" rel="nofollow"&amp;gt;Twitter Web Client&amp;lt;/a&amp;gt;,false,123,,,,,,,,,,,,,,,)
2016-09-11 22:16:29,782 [main] INFO  org.apache.pig.Main - Pig script completed in 16 seconds and 908 milliseconds (16908 ms)
2016-09-11 22:16:29,789 [main] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher - Shutting down thread pool
2016-09-11 22:16:29,804 [Thread-34] INFO  org.apache.pig.backend.hadoop.executionengine.tez.TezSessionManager - Shutting down Tez session org.apache.tez.client.TezClient@5df6b6ad
2016-09-11 22:16:30,214 [Thread-34] INFO  org.apache.tez.client.TezClient - Shutting down Tez Session, sessionName=PigLatin:mohan.pig, applicationId=application_1473630550492_0001
&lt;/PRE&gt;</description>
      <pubDate>Mon, 12 Sep 2016 05:24:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140748#M39981</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-09-12T05:24:33Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read json file using elephant-bird,please help</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140749#M39982</link>
      <description>&lt;P&gt;thanks for your reply &lt;A href="https://community.hortonworks.com/users/393/aervits.html"&gt;Artem Ervits&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;I think it is because of the difference versions that i have used in my script.&lt;/P&gt;&lt;P&gt;When i used the same versions of elephant bird then it worked fine for me as suggested by @gkeys.&lt;/P&gt;&lt;P&gt;&lt;B&gt;script:-&lt;/B&gt;&lt;/P&gt;&lt;PRE&gt;REGISTER elephant-bird-core-4.1.jar 
REGISTER elephant-bird-hadoop-compat-4.1.jar 
REGISTER elephant-bird-pig-4.1.jar 
REGISTER json-simple-1.1.1.jar

twitter = LOAD 'sample.json' USING com.twitter.elephantbird.pig.load.JsonLoader();

extracted = foreach twitter generate (chararray)$0#'created_at' as created_at,(chararray)$0#'id' as id,(chararray)$0#'id_str' as id_str,(chararray)$0#'text' as text,(chararray)$0#'source' as source,com.twitter.elephantbird.pig.piggybank.JsonStringToMap($0#'entities') as entities,(boolean)$0#'favorited' as favorited,(long)$0#'favorite_count' as favorite_count,(long)$0#'retweet_count' as retweet_count,(boolean)$0#'retweeted' as retweeted,com.twitter.elephantbird.pig.piggybank.JsonStringToMap($0#'place') as place;

dump extracted;

And it worked fine.&lt;/PRE&gt;</description>
      <pubDate>Mon, 12 Sep 2016 20:20:36 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Unable-to-read-json-file-using-elephant-bird-please-help/m-p/140749#M39982</guid>
      <dc:creator>mohan221213</dc:creator>
      <dc:date>2016-09-12T20:20:36Z</dc:date>
    </item>
  </channel>
</rss>

