<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: puthdfs is writing slow in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220081#M66657</link>
    <description>&lt;A rel="user" href="https://community.cloudera.com/users/23208/hadoopuserhadoop.html" nodeid="23208"&gt;@Hadoop User&lt;/A&gt;&lt;P&gt;If merging FlowFiles and adding more concurrent tasks to your putHDFS processor help with your performance issue here, please take a moment to click "accept" on the above answer to close out this thread.&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Matt&lt;/P&gt;</description>
    <pubDate>Fri, 13 Oct 2017 23:01:16 GMT</pubDate>
    <dc:creator>MattWho</dc:creator>
    <dc:date>2017-10-13T23:01:16Z</dc:date>
    <item>
      <title>puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220076#M66652</link>
      <description>&lt;P&gt;I am getting data using gethdfs and did some processing and writing back to hdfs.&lt;/P&gt;&lt;P&gt;untill puthdfs data is processing fast, but puthdfs is writing data slow to hdfs.&lt;/P&gt;&lt;P&gt;could you please let me know how to improve the speed?&lt;/P&gt;</description>
      <pubDate>Tue, 15 Aug 2017 20:25:56 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220076#M66652</guid>
      <dc:creator>mark_hadoop</dc:creator>
      <dc:date>2017-08-15T20:25:56Z</dc:date>
    </item>
    <item>
      <title>Re: puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220077#M66653</link>
      <description>&lt;A rel="user" href="https://community.cloudera.com/users/23208/hadoopuserhadoop.html" nodeid="23208"&gt;@Hadoop User&lt;/A&gt;&lt;P&gt;Please share your PutHDFS processor configuration with us.&lt;/P&gt;&lt;P&gt;How large are the individual files that are being written to HDFS?&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Matt&lt;/P&gt;</description>
      <pubDate>Tue, 15 Aug 2017 20:57:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220077#M66653</guid>
      <dc:creator>MattWho</dc:creator>
      <dc:date>2017-08-15T20:57:35Z</dc:date>
    </item>
    <item>
      <title>Re: puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220078#M66654</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/525/mclark.html" nodeid="525" target="_blank"&gt;@Matt Clarke&lt;/A&gt;&lt;/P&gt;&lt;P&gt;a file is only 1-2 kb file.&lt;/P&gt;&lt;P&gt;configuration is &lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="28383-puthdfs-config.jpg" style="width: 683px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/15914i953DC2E1FF186ACF/image-size/medium?v=v2&amp;amp;px=400" role="button" title="28383-puthdfs-config.jpg" alt="28383-puthdfs-config.jpg" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;concurrent tasks; 1, rest are not changed.&lt;/P&gt;&lt;P&gt;Thank you&lt;/P&gt;</description>
      <pubDate>Sun, 18 Aug 2019 02:31:45 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220078#M66654</guid>
      <dc:creator>mark_hadoop</dc:creator>
      <dc:date>2019-08-18T02:31:45Z</dc:date>
    </item>
    <item>
      <title>Re: puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220079#M66655</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/23208/hadoopuserhadoop.html" nodeid="23208"&gt;@Hadoop User&lt;/A&gt; &lt;/P&gt;&lt;P&gt;It is unlikely you will see the same performance out of Hadoop between reads and writes.  The Hadoop Architecture is designed in such a way to favor multiple many readers and few data writers.&lt;/P&gt;&lt;P&gt;Increasing the number of concurrent tasks may help but performance since you will then have multiple files being written concurrently.&lt;/P&gt;&lt;P&gt;1 - 2 KB files are very small and do not make optimal use of your Hadoop architecture.  Commonly, NiFi is used to merge bundles of files together to a more optimal size for storage in Hadoop.   I believe 64 KB is the default optimal size.  &lt;/P&gt;&lt;P&gt;You can remove some of the overhead of each connection by mergeing files together in to larger files using the MergeContent processor before writing to Hadoop.&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Matt&lt;/P&gt;</description>
      <pubDate>Wed, 16 Aug 2017 01:14:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220079#M66655</guid>
      <dc:creator>MattWho</dc:creator>
      <dc:date>2017-08-16T01:14:49Z</dc:date>
    </item>
    <item>
      <title>Re: puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220080#M66656</link>
      <description>&lt;P&gt;&lt;/P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/525/mclark.html" nodeid="525"&gt;@Matt Clarke&lt;/A&gt;&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/525/mclark.html" nodeid="525"&gt;Thank yo&lt;/A&gt;u.&lt;/P&gt;&lt;P&gt;I have merged files depending on the frequency of writes to 64 KB.&lt;/P&gt;&lt;P&gt;Sorry for turning late.&lt;/P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/525/mclark.html" nodeid="525"&gt;&lt;/A&gt;&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/525/mclark.html" nodeid="525"&gt;&lt;/A&gt; &lt;/P&gt;</description>
      <pubDate>Fri, 13 Oct 2017 17:34:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220080#M66656</guid>
      <dc:creator>mark_hadoop</dc:creator>
      <dc:date>2017-10-13T17:34:11Z</dc:date>
    </item>
    <item>
      <title>Re: puthdfs is writing slow</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220081#M66657</link>
      <description>&lt;A rel="user" href="https://community.cloudera.com/users/23208/hadoopuserhadoop.html" nodeid="23208"&gt;@Hadoop User&lt;/A&gt;&lt;P&gt;If merging FlowFiles and adding more concurrent tasks to your putHDFS processor help with your performance issue here, please take a moment to click "accept" on the above answer to close out this thread.&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Matt&lt;/P&gt;</description>
      <pubDate>Fri, 13 Oct 2017 23:01:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/puthdfs-is-writing-slow/m-p/220081#M66657</guid>
      <dc:creator>MattWho</dc:creator>
      <dc:date>2017-10-13T23:01:16Z</dc:date>
    </item>
  </channel>
</rss>

