<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Which types of files can we load into HDP data platform? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106989#M21689</link>
    <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/3172/aryib.html" nodeid="3172"&gt;@Abdi Ismail&lt;/A&gt;
&lt;/P&gt;&lt;P&gt;hadoop is a schema on read, generic, multi-purpose framework. You can ingest any type of file, you provide instructions to tools accessing your files like Hive, Pig, MapReduce and Spark. Out of the box, you can read CSV, JSON, Avro and XML, perhaps I should clarify that for example with Hive, you can provide a "SerDe" stands for serializer deserializer, think of it as a translator for your file type and read your files then. For HTML, you can use a library like jsoup to read those files and parse them with tools I mentioned above.&lt;/P&gt;</description>
    <pubDate>Thu, 03 Mar 2016 03:45:48 GMT</pubDate>
    <dc:creator>aervits</dc:creator>
    <dc:date>2016-03-03T03:45:48Z</dc:date>
    <item>
      <title>Which types of files can we load into HDP data platform?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106988#M21688</link>
      <description>&lt;P&gt;I noticed in the tutorials files that end with '.csv' were used so I was wondering if other file formats of data are accepted, such as 'html' 'xml' 'xls', and what other formats are accepted?&lt;/P&gt;</description>
      <pubDate>Thu, 03 Mar 2016 03:43:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106988#M21688</guid>
      <dc:creator>aryib</dc:creator>
      <dc:date>2016-03-03T03:43:47Z</dc:date>
    </item>
    <item>
      <title>Re: Which types of files can we load into HDP data platform?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106989#M21689</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/3172/aryib.html" nodeid="3172"&gt;@Abdi Ismail&lt;/A&gt;
&lt;/P&gt;&lt;P&gt;hadoop is a schema on read, generic, multi-purpose framework. You can ingest any type of file, you provide instructions to tools accessing your files like Hive, Pig, MapReduce and Spark. Out of the box, you can read CSV, JSON, Avro and XML, perhaps I should clarify that for example with Hive, you can provide a "SerDe" stands for serializer deserializer, think of it as a translator for your file type and read your files then. For HTML, you can use a library like jsoup to read those files and parse them with tools I mentioned above.&lt;/P&gt;</description>
      <pubDate>Thu, 03 Mar 2016 03:45:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106989#M21689</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-03-03T03:45:48Z</dc:date>
    </item>
    <item>
      <title>Re: Which types of files can we load into HDP data platform?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106990#M21690</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/3172/aryib.html" nodeid="3172"&gt;@Abdi Ismail&lt;/A&gt; some examples here &lt;A href="https://community.hortonworks.com/questions/15422/hive-and-avro-schema-defined-in-tblproperties-vs-s.html"&gt;https://community.hortonworks.com/questions/15422/hive-and-avro-schema-defined-in-tblproperties-vs-s.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/content/kbentry/972/hive-and-xml-pasring.html"&gt;https://community.hortonworks.com/content/kbentry/972/hive-and-xml-pasring.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/4345/querying-json-data-using-hive.html"&gt;https://community.hortonworks.com/questions/4345/querying-json-data-using-hive.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/18792/pig-and-json.html"&gt;https://community.hortonworks.com/questions/18792/pig-and-json.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 03 Mar 2016 03:49:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Which-types-of-files-can-we-load-into-HDP-data-platform/m-p/106990#M21690</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-03-03T03:49:52Z</dc:date>
    </item>
  </channel>
</rss>

