<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Getting multiple records while loading nested XML data into HIVE in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222481#M184351</link>
    <description>&lt;P&gt;Hi &lt;A rel="user" href="https://community.cloudera.com/users/84172/anjiraju33.html" nodeid="84172"&gt;@Anji Raju&lt;/A&gt;! &lt;BR /&gt;Hmm, guess there's a little almost invisible mistake in your xpath &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; &lt;BR /&gt;Try to change your ReturnPay&lt;STRONG&gt;L&lt;/STRONG&gt;oad&lt;/P&gt;&lt;PRE&gt; "column.xpath.ReturnPayLoad" = "/FormServerResponse/ReturnPaylLoad/ACORD/SignonRq"&lt;/PRE&gt;&lt;P&gt;to ReturnPay&lt;STRONG&gt;l&lt;/STRONG&gt;oad&lt;/P&gt;&lt;PRE&gt;"column.xpath.ReturnPayLoad" = "/FormServerResponse/ReturnPayload/ACORD/SignonRq"&lt;/PRE&gt;&lt;P&gt;Hope this helps!&lt;/P&gt;</description>
    <pubDate>Wed, 20 Jun 2018 10:00:35 GMT</pubDate>
    <dc:creator>vmurakami</dc:creator>
    <dc:date>2018-06-20T10:00:35Z</dc:date>
    <item>
      <title>Getting multiple records while loading nested XML data into HIVE</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222478#M184348</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I am trying to load nested XML file into HIVE table, but i see multiple lines getting generated for the values in Array.&lt;BR /&gt;&lt;BR /&gt;XML File :&lt;BR /&gt;&lt;BR /&gt;&amp;lt;?xml version="1.0" encoding="UTF-8"?&amp;gt; 
&amp;lt;FormServerResponse&amp;gt; 
 &amp;lt;Status&amp;gt; 
 &amp;lt;StatusCode&amp;gt;0&amp;lt;/StatusCode&amp;gt; 
 &amp;lt;StatusDesc&amp;gt; Success&amp;lt;/StatusDesc&amp;gt; 
 &amp;lt;ConfidenceIndex&amp;gt; D&amp;lt;/ConfidenceIndex&amp;gt; 
 &amp;lt;Messages&amp;gt; 
 &amp;lt;Message severity="Success"&amp;gt; 
 &amp;lt;StatusCode&amp;gt; 0&amp;lt;/StatusCode&amp;gt; 
 &amp;lt;PageNumber&amp;gt; 1&amp;lt;/PageNumber&amp;gt; 
 &amp;lt;FileName&amp;gt; Filxxxxe.pdf&amp;lt;/FileName&amp;gt; 
 &amp;lt;LayoutFileId&amp;gt; xxxx&amp;lt;/LayoutFileId&amp;gt; 
 &amp;lt;LayoutPageId&amp;gt; xxxxx&amp;lt;/LayoutPageId&amp;gt; 
 &amp;lt;LayoutIdentiferText/&amp;gt; 
 &amp;lt;Text&amp;gt; xx xxxxxx xx&amp;lt;/Text&amp;gt; 
 &amp;lt;/Message&amp;gt; 
 &amp;lt;/Messages&amp;gt; 
 &amp;lt;/Status&amp;gt; 
 &amp;lt;/FormServerResponse&amp;gt; &lt;BR /&gt;&lt;BR /&gt;Command I used to load data into HIVE:&lt;/P&gt;&lt;P&gt;create  table ViewTable(    StatusCode      string,StatusDesc      string,ConfidenceIndex string,Messages       struct&amp;lt;Message: array&amp;lt;struct&amp;lt; severity:string,statuscode:string,pagenumber:string,filename:string,layoutfileid:string, layoutpageid:string, layoutidentifertext:string,text:string&amp;gt;&amp;gt;&amp;gt; ) 
row format serde 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties 
(
    "column.xpath.StatusCode"       = "/Status/StatusCode/text()",    "column.xpath.StatusDesc"       = "/Status/StatusDesc/text()","column.xpath.ConfidenceIndex"  = "/Status/ConfidenceIndex/text()",    "column.xpath.Messages"         = "/Status/Messages/Message"
)
stored as 
inputformat     'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
outputformat    'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
tblproperties 
(
    "xmlinput.start"    = "&amp;lt;Status&amp;gt;"
   ,"xmlinput.end"      = "&amp;lt;/Status&amp;gt;"
);&lt;/P&gt;&lt;P&gt;Result :&lt;/P&gt;&lt;P&gt;select * from viewtable;
OK
0       Success D       {"message":[{"severity":null,"statuscode":"0","pagenumber":null,"filename":null,"layoutfileid":null,"layoutpageid":null,"layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":"1","filename":null,"layoutfileid":null,"layoutpageid":null,"layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":null,"filename":"Filxxxxe.pdf","layoutfileid":null,"layoutpageid":null,"layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":null,"filename":null,"layoutfileid":"xxxx","layoutpageid":null,"layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":null,"filename":null,"layoutfileid":null,"layoutpageid":"xxxxx","layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":null,"filename":null,"layoutfileid":null,"layoutpageid":null,"layoutidentifertext":null,"text":null},{"severity":null,"statuscode":null,"pagenumber":null,"filename":null,"layoutfileid":null,"layoutpageid":null,"layoutidentifertext":null,"text":"xx xxxxxx xx"}]}
Time taken: 0.423 seconds, Fetched: 1 row(s)&lt;/P&gt;&lt;P&gt;Result expected:&lt;/P&gt;&lt;P&gt;0 Success D  {"message":[{"severity":success,"statuscode":"0","pagenumber":1,"filename":Filxxxe.pdf,"layoutfileid":xxxx,"layoutpageid":xxxxx,"layoutidentifertext":null,"text":xx xxxxx xx}]}&lt;BR /&gt;&lt;BR /&gt;Could someone help me where I am making mistake?????&lt;/P&gt;</description>
      <pubDate>Thu, 14 Jun 2018 22:41:34 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222478#M184348</guid>
      <dc:creator>anjiraju33</dc:creator>
      <dc:date>2018-06-14T22:41:34Z</dc:date>
    </item>
    <item>
      <title>Re: Getting multiple records while loading nested XML data into HIVE</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222479#M184349</link>
      <description>&lt;P&gt;Hey &lt;A rel="user" href="https://community.cloudera.com/users/84172/anjiraju33.html" nodeid="84172"&gt;@Anji Raju&lt;/A&gt;!&lt;BR /&gt;I made a test here, and it seems that you need to change your &lt;BR /&gt;&lt;STRONG&gt;&lt;/STRONG&gt;&lt;STRONG&gt;Messages struct&amp;lt;Message: array&amp;lt;struct&amp;lt; &lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&lt;/STRONG&gt;for&lt;BR /&gt;&lt;STRONG&gt;&lt;/STRONG&gt;&lt;STRONG&gt;&lt;EM&gt;Messages array&amp;lt;struct&amp;lt;Message:struct&amp;lt;&lt;/EM&gt;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;Here's my test:&lt;/P&gt;&lt;PRE&gt;#My sample.xml file&lt;BR /&gt;&amp;lt;Status&amp;gt;
	&amp;lt;StatusCode&amp;gt;0&amp;lt;/StatusCode&amp;gt;
	&amp;lt;StatusDesc&amp;gt;Success&amp;lt;/StatusDesc&amp;gt;
	&amp;lt;ConfidenceIndex&amp;gt;D&amp;lt;/ConfidenceIndex&amp;gt;
	&amp;lt;Messages&amp;gt;
		&amp;lt;Message&amp;gt;
			&amp;lt;severity&amp;gt;&amp;lt;/severity&amp;gt; 
			&amp;lt;statuscode&amp;gt;0&amp;lt;/statuscode&amp;gt;
			&amp;lt;pagenumber&amp;gt;1&amp;lt;/pagenumber&amp;gt;
			&amp;lt;filename&amp;gt;Filxxxxe.pdf&amp;lt;/filename&amp;gt;
			&amp;lt;layoutfileid&amp;gt;xxxx&amp;lt;/layoutfileid&amp;gt;
			&amp;lt;layoutpageid&amp;gt;xxxxx&amp;lt;/layoutpageid&amp;gt;
			&amp;lt;layoutidentifertext&amp;gt;&amp;lt;/layoutidentifertext&amp;gt;
			&amp;lt;text&amp;gt;xx xxxxxx xx&amp;lt;/text&amp;gt;
		&amp;lt;/Message&amp;gt;
	&amp;lt;/Messages&amp;gt;
&amp;lt;/Status&amp;gt;&lt;BR /&gt;#Downloading and adding the jar from ibm.spss
wget &lt;A href="http://search.maven.org/remotecontent?filepath=com/ibm/spss/hive/serde2/xml/hivexmlserde/1.0.5.3/hivexmlserde-1.0.5.3.jar" target="_blank"&gt;http://search.maven.org/remotecontent?filepath=com/ibm/spss/hive/serde2/xml/hivexmlserde/1.0.5.3/hivexmlserde-1.0.5.3.jar&lt;/A&gt;
mv remotecontent?filepath=com%2Fibm%2Fspss%2Fhive%2Fserde2%2Fxml%2Fhivexmlserde%2F1.0.5.3%2Fhivexmlserde-1.0.5.3.jar hivexmlserde-1.0.5.3.jar
#Creating the HDFS location for the table and putting the sample.xml above&lt;BR /&gt;hdfs dfs -mkdir /user/hive/warehouse/hive-xml-test
hdfs dfs -put sample.xml /user/hive/warehouse/hive-xml-test

#Starting the test in Hive
hive&amp;gt; add jar /tmp/hivexmlserde-1.0.5.3.jar;
create external table xmltest( 
StatusCode string,
StatusDesc string,
ConfidenceIndex string,
Messages array&amp;lt;struct&amp;lt;Message:struct&amp;lt; 
severity:string,
statuscode:string,
pagenumber:string,
filename:string,
layoutfileid:string,
layoutpageid:string,
layoutidentifertext:string,
text:string&amp;gt;&amp;gt;&amp;gt; 
) 
row format serde 'com.ibm.spss.hive.serde2.xml.XmlSerDe' 
with serdeproperties ( 
"column.xpath.StatusCode" = "/Status/StatusCode/text()"
,"column.xpath.StatusDesc" = "/Status/StatusDesc/text()"
,"column.xpath.ConfidenceIndex" = "/Status/ConfidenceIndex/text()"
, "column.xpath.Messages" = "/Status/Messages/Message" ) 
stored as inputformat 'com.ibm.spss.hive.serde2.xml.XmlInputFormat' 
outputformat 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' 
location '/user/hive/warehouse/hive-xml-test'
tblproperties ( "xmlinput.start" = "&amp;lt;Status&amp;gt;" ,"xmlinput.end" = "&amp;lt;/Status&amp;gt;" );
hive&amp;gt; select * from xmltest;
OK
0	Success	D	[{"message":{"severity":null,"statuscode":"0","pagenumber":"1","filename":"Filxxxxe.pdf","layoutfileid":"xxxx","layoutpageid":"xxxxx","layoutidentifertext":null,"text":"xx xxxxxx xx"}}]
Time taken: 0.104 seconds, Fetched: 1 row(s)
&lt;/PRE&gt;&lt;P&gt;Hope this helps!&lt;/P&gt;</description>
      <pubDate>Sat, 16 Jun 2018 06:04:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222479#M184349</guid>
      <dc:creator>vmurakami</dc:creator>
      <dc:date>2018-06-16T06:04:01Z</dc:date>
    </item>
    <item>
      <title>Re: Getting multiple records while loading nested XML data into HIVE</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222480#M184350</link>
      <description>&lt;P&gt;Thanks &lt;A rel="user" href="https://community.cloudera.com/users/79158/vmurakami.html" nodeid="79158"&gt;@Vinicius Higa Murakami&lt;/A&gt;. I was able to make correction and was able to achieve it.&lt;BR /&gt;&lt;BR /&gt;There is one more help I need . I am trying to load the complex structure of XML&amp;gt; i am able to load all the fields but there is one section which I am trying in many way to load the data but upon the loading of data, i still see the values to be NULL&amp;gt;&lt;BR /&gt;&lt;BR /&gt;Attached the files.&lt;/P&gt;&lt;P&gt;a) XPATH what i am writing b) the XML file I am loading &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.cloudera.com/legacyfs/online/attachments/78479-xpath.txt"&gt;xpath.txt&lt;/A&gt; &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.cloudera.com/legacyfs/online/attachments/78480-nestedxml.xml"&gt;nestedxml.xml&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Output:&lt;/P&gt;&lt;P&gt;0       Success A       [{"message":{"severity":null,"statuscode":"0","pagenumber":"1","filename":"File.pdf","layoutfileid":"ACORD_125_2016_03","layoutpageid":"page1_COMMERCIAL_INSURANCE_APPLICATION","layoutidentifertext":null,"text":"Page Successfully Processed"}},{"message":{"severity":null,"statuscode":"0","pagenumber":"2","filename":"File.pdf","layoutfileid":"ACORD_125_2016_03","layoutpageid":"page2_CONTACT_INFORMATION","layoutidentifertext":null,"text":"Page Successfully Processed"}},{"message":{"severity":null,"statuscode":"0","pagenumber":"3","filename":"File.pdf","layoutfileid":"ACORD_125_2016_03","layoutpageid":"page3_GENERAL_INFORMATION","layoutidentifertext":null,"text":"Page Successfully Processed"}},{"message":{"severity":null,"statuscode":"0","pagenumber":"4","filename":"File.pdf","layoutfileid":"ACORD_125_2016_03","layoutpageid":"page4_PRIOR_CARRIER_INFORMATION","layoutidentifertext":null,"text":"Page Successfully Processed"}}]    {"transactionid":"80c32800-65cb-11e8-8f5a-005056a3433861521442784784","productversion":"2.06.3.00064","requestdate":"2018-06-01","processingtime":"4 sec","filesize":"464888 bytes","extractiontechnique":"Text"}      {"lobcd":"CPKGE","filesreceivedcnt":"1","pagesreceivedcnt":"4","pagesprocessedcnt":"4","formsprocessedcnt":"1","forms":[{"form":{"formname":"ACORD_125_2016_03","totalfields":"533","fieldswithdata":"82","ratebearingfields":"12","ratebearingfieldswithdata":"9"}}]}   NULL&lt;/P&gt;&lt;P&gt;Could you please help me with this&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Harish&lt;/P&gt;</description>
      <pubDate>Tue, 19 Jun 2018 03:14:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222480#M184350</guid>
      <dc:creator>anjiraju33</dc:creator>
      <dc:date>2018-06-19T03:14:55Z</dc:date>
    </item>
    <item>
      <title>Re: Getting multiple records while loading nested XML data into HIVE</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222481#M184351</link>
      <description>&lt;P&gt;Hi &lt;A rel="user" href="https://community.cloudera.com/users/84172/anjiraju33.html" nodeid="84172"&gt;@Anji Raju&lt;/A&gt;! &lt;BR /&gt;Hmm, guess there's a little almost invisible mistake in your xpath &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; &lt;BR /&gt;Try to change your ReturnPay&lt;STRONG&gt;L&lt;/STRONG&gt;oad&lt;/P&gt;&lt;PRE&gt; "column.xpath.ReturnPayLoad" = "/FormServerResponse/ReturnPaylLoad/ACORD/SignonRq"&lt;/PRE&gt;&lt;P&gt;to ReturnPay&lt;STRONG&gt;l&lt;/STRONG&gt;oad&lt;/P&gt;&lt;PRE&gt;"column.xpath.ReturnPayLoad" = "/FormServerResponse/ReturnPayload/ACORD/SignonRq"&lt;/PRE&gt;&lt;P&gt;Hope this helps!&lt;/P&gt;</description>
      <pubDate>Wed, 20 Jun 2018 10:00:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222481#M184351</guid>
      <dc:creator>vmurakami</dc:creator>
      <dc:date>2018-06-20T10:00:35Z</dc:date>
    </item>
    <item>
      <title>Re: Getting multiple records while loading nested XML data into HIVE</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222482#M184352</link>
      <description>&lt;P&gt;Thanks &lt;A rel="user" href="https://community.cloudera.com/users/79158/vmurakami.html" nodeid="79158"&gt;@Vinicius Higa Murakami&lt;/A&gt; , I was able to figure out the Typo Mistake. Thanks for the help&lt;/P&gt;</description>
      <pubDate>Wed, 20 Jun 2018 20:59:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Getting-multiple-records-while-loading-nested-XML-data-into/m-p/222482#M184352</guid>
      <dc:creator>anjiraju33</dc:creator>
      <dc:date>2018-06-20T20:59:06Z</dc:date>
    </item>
  </channel>
</rss>

