<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Small file in hadoop in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374199#M241903</link>
    <description>&lt;P&gt;Hi Team ,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As we have more then 14 million small file as per Cloudera Navigator as below :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;14.8M&amp;nbsp;Small files created in&amp;nbsp;&lt;BR /&gt;the last 30 days&amp;nbsp;&lt;BR /&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;14.8M / 21.4M&lt;BR /&gt;&amp;nbsp;69.3% small files&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We are doing partition of data per day wise can we increase it or any other suggestion is there to overcome the small file problem.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank You in Advance&lt;/P&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV class="flex-col-1"&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV class="flex-col-1"&gt;&lt;DIV&gt;&lt;DIV class="group-body"&gt;&lt;DIV class="dashboard-group-body-multiple-container with-title"&gt;&lt;DIV class="dashboard-group-body-multiple-container-item dashboard-activity-column"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV&gt;&lt;DIV class="group-body"&gt;&lt;DIV class="dashboard-group-body-multiple-container flex-wrap"&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
    <pubDate>Thu, 20 Jul 2023 10:16:47 GMT</pubDate>
    <dc:creator>cdl-support</dc:creator>
    <dc:date>2023-07-20T10:16:47Z</dc:date>
    <item>
      <title>Small file in hadoop</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374199#M241903</link>
      <description>&lt;P&gt;Hi Team ,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As we have more then 14 million small file as per Cloudera Navigator as below :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;14.8M&amp;nbsp;Small files created in&amp;nbsp;&lt;BR /&gt;the last 30 days&amp;nbsp;&lt;BR /&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;14.8M / 21.4M&lt;BR /&gt;&amp;nbsp;69.3% small files&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We are doing partition of data per day wise can we increase it or any other suggestion is there to overcome the small file problem.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank You in Advance&lt;/P&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV class="flex-col-1"&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV class="flex-col-1"&gt;&lt;DIV&gt;&lt;DIV class="group-body"&gt;&lt;DIV class="dashboard-group-body-multiple-container with-title"&gt;&lt;DIV class="dashboard-group-body-multiple-container-item dashboard-activity-column"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="dashboard-groups-container"&gt;&lt;DIV&gt;&lt;DIV class="group-body"&gt;&lt;DIV class="dashboard-group-body-multiple-container flex-wrap"&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 20 Jul 2023 10:16:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374199#M241903</guid>
      <dc:creator>cdl-support</dc:creator>
      <dc:date>2023-07-20T10:16:47Z</dc:date>
    </item>
    <item>
      <title>Re: Small file in hadoop</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374247#M241915</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/30557"&gt;@cdl-support&lt;/a&gt;&amp;nbsp;Welcome to the Cloudera Community!&lt;BR /&gt;&lt;BR /&gt;To help you get the best possible solution, I have tagged our Atlas experts&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/23428"&gt;@BennyZ&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/67192"&gt;@mayank_gupta&lt;/a&gt;&amp;nbsp;&amp;nbsp;who may be able to assist you further.&lt;BR /&gt;&lt;BR /&gt;Please keep us updated on your post, and we hope you find a satisfactory solution to your query.&lt;/P&gt;</description>
      <pubDate>Thu, 20 Jul 2023 20:24:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374247#M241915</guid>
      <dc:creator>DianaTorres</dc:creator>
      <dc:date>2023-07-20T20:24:01Z</dc:date>
    </item>
    <item>
      <title>Re: Small file in hadoop</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374279#M241933</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/30557"&gt;@cdl-support&lt;/a&gt;&amp;nbsp;. You can refer to the below article and check if those help.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://my.cloudera.com/knowledge/Issue-with-Small-Files-in-HDFS?id=308948" target="_blank"&gt;https://my.cloudera.com/knowledge/Issue-with-Small-Files-in-HDFS?id=308948&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Using Hive :&amp;nbsp;&lt;A href="https://docs.cloudera.com/best-practices/latest/impala-performance/topics/bp-impala-avoiding-small-files.html" target="_blank"&gt;https://docs.cloudera.com/best-practices/latest/impala-performance/topics/bp-impala-avoiding-small-files.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 11:11:34 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374279#M241933</guid>
      <dc:creator>rki_</dc:creator>
      <dc:date>2023-07-21T11:11:34Z</dc:date>
    </item>
    <item>
      <title>Re: Small file in hadoop</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374470#M241992</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/30557"&gt;@cdl-support&lt;/a&gt;&amp;nbsp;Has the reply helped resolve your issue? If so, please mark the appropriate reply as the solution, as it will make it easier for others to find the answer in the future. Thanks.&lt;/P&gt;</description>
      <pubDate>Tue, 25 Jul 2023 20:02:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Small-file-in-hadoop/m-p/374470#M241992</guid>
      <dc:creator>DianaTorres</dc:creator>
      <dc:date>2023-07-25T20:02:11Z</dc:date>
    </item>
  </channel>
</rss>

