<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Number of ORC files effect on namnode? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129971#M43472</link>
    <description>&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/3037/what-size-of-tables-make-the-best-out-of-orc-forma.html" target="_blank"&gt;https://community.hortonworks.com/questions/3037/what-size-of-tables-make-the-best-out-of-orc-forma.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Large ORC files with large stripes should be best performance.&lt;/P&gt;&lt;P&gt;Look at this Yahoo article on Hive and ORC at scale&lt;/P&gt;&lt;P&gt;&lt;A href="http://www.slideshare.net/Hadoop_Summit/hive-at-yahoo-letters-from-the-trenches" target="_blank"&gt;http://www.slideshare.net/Hadoop_Summit/hive-at-yahoo-letters-from-the-trenches&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 25 Oct 2016 04:04:13 GMT</pubDate>
    <dc:creator>TimothySpann</dc:creator>
    <dc:date>2016-10-25T04:04:13Z</dc:date>
    <item>
      <title>Number of ORC files effect on namnode?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129969#M43470</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;For very large datasets in PB range does it help creating large ORC files?&lt;/P&gt;&lt;P&gt;I understand they should be greater than block size.&lt;/P&gt;&lt;P&gt;So lets say I have a block size of 256 mb and am creating 1 GB ORC files for a hive table of total size 3 TB.&lt;/P&gt;&lt;P&gt;So would it help to create bigger file sizes say of 2 GB?&lt;/P&gt;&lt;P&gt;Keep in mind I will be using ORC index to query only 1 file per partition and that data output would be in kb.&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 14 Oct 2016 01:35:58 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129969#M43470</guid>
      <dc:creator>rbiswas1</dc:creator>
      <dc:date>2016-10-14T01:35:58Z</dc:date>
    </item>
    <item>
      <title>Re: Number of ORC files effect on namnode?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129970#M43471</link>
      <description>&lt;P&gt;
	As a general rule, you should be creating the largest files you can within a partition.&lt;/P&gt;&lt;P&gt;
	Check out &lt;A rel="user" href="https://community.cloudera.com/users/175/dstreever.html" nodeid="175"&gt;@David Streever&lt;/A&gt;'s excellent answer to &lt;A href="https://community.hortonworks.com/questions/4024/how-many-files-is-too-many-on-a-modern-hdp-cluster.html"&gt;this question&lt;/A&gt; for more details.  &lt;/P&gt;</description>
      <pubDate>Tue, 25 Oct 2016 03:20:10 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129970#M43471</guid>
      <dc:creator>twilson</dc:creator>
      <dc:date>2016-10-25T03:20:10Z</dc:date>
    </item>
    <item>
      <title>Re: Number of ORC files effect on namnode?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129971#M43472</link>
      <description>&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/3037/what-size-of-tables-make-the-best-out-of-orc-forma.html" target="_blank"&gt;https://community.hortonworks.com/questions/3037/what-size-of-tables-make-the-best-out-of-orc-forma.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Large ORC files with large stripes should be best performance.&lt;/P&gt;&lt;P&gt;Look at this Yahoo article on Hive and ORC at scale&lt;/P&gt;&lt;P&gt;&lt;A href="http://www.slideshare.net/Hadoop_Summit/hive-at-yahoo-letters-from-the-trenches" target="_blank"&gt;http://www.slideshare.net/Hadoop_Summit/hive-at-yahoo-letters-from-the-trenches&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 25 Oct 2016 04:04:13 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Number-of-ORC-files-effect-on-namnode/m-p/129971#M43472</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-10-25T04:04:13Z</dc:date>
    </item>
  </channel>
</rss>

