<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Nifi SplitText split file with 2 records to 10000 and more files in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395558#M248974</link>
    <description>&lt;P&gt;My&amp;nbsp;&lt;SPAN&gt;SplitFile processor connected (follows by) ValidateRecords processor.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;ValidateRecords use CSVReader with following configuration:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="M60Larmp.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/42249i4FA28B9CDE7214D8/image-size/large?v=v2&amp;amp;px=999" role="button" title="M60Larmp.png" alt="M60Larmp.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Input_schema is avro:&lt;/P&gt;&lt;PRE&gt;{
 "type": "record",
 "name": "geo_data",
 "fields": [
    {
      "name": "id",
      "type": [
         "int",
         "null"
       ]
    },
    {
      "name": "description",
        "type": [
         "string",
         "null"
       ]
    }
   ]
  }&lt;/PRE&gt;&lt;P&gt;And the source of this pipeline in ListS3 and FetchS3 processors.&lt;/P&gt;</description>
    <pubDate>Sat, 19 Oct 2024 19:42:05 GMT</pubDate>
    <dc:creator>AndreyDE</dc:creator>
    <dc:date>2024-10-19T19:42:05Z</dc:date>
    <item>
      <title>Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395501#M248956</link>
      <description>&lt;P&gt;&amp;nbsp; &amp;nbsp;Hello!&lt;/P&gt;&lt;P&gt;&amp;nbsp; The configuration of my SplitText is:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Снимок экрана 2024-10-18 в 15.00.11.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/42232i28365711FE727C41/image-size/large?v=v2&amp;amp;px=999" role="button" title="Снимок экрана 2024-10-18 в 15.00.11.png" alt="Снимок экрана 2024-10-18 в 15.00.11.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;The task is to split one csv file:&lt;/P&gt;&lt;PRE&gt;&amp;nbsp;  id;description
   "1234";"The latitude is 12324.24"
   "2345";"12324.24 this value"&lt;/PRE&gt;&lt;P&gt;on 2 files:&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;   id;description
   "1234";"The latitude is 12324.24"&lt;/PRE&gt;&lt;P&gt;and&lt;/P&gt;&lt;PRE&gt;   id;description
   "2345";12324.24 this value"&lt;/PRE&gt;&lt;P&gt;But it returns 10000 and more duplicated files!&lt;/P&gt;&lt;P&gt;What am I doing wrong?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Oct 2024 12:37:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395501#M248956</guid>
      <dc:creator>AndreyDE</dc:creator>
      <dc:date>2024-10-18T12:37:46Z</dc:date>
    </item>
    <item>
      <title>Re: Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395528#M248966</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/119091"&gt;@AndreyDE&lt;/a&gt;&amp;nbsp;,&lt;BR /&gt;&lt;BR /&gt;What's your input into the SplitFile processor?&lt;/P&gt;&lt;P&gt;I used your example and getting a valid output -&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="drewski7_0-1729273341413.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/42239iB4BE4C720718D748/image-size/medium?v=v2&amp;amp;px=400" role="button" title="drewski7_0-1729273341413.png" alt="drewski7_0-1729273341413.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Make sure the file going into the SplitText is not re-reading the same file over and over again and also if you are using generateFlowFile make sure the scheduling isn't set to 0 sec because it will keep outputting a bunch of flowfiles.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Please accept this solution if it's correct, thanks!&lt;/P&gt;</description>
      <pubDate>Fri, 18 Oct 2024 17:43:34 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395528#M248966</guid>
      <dc:creator>drewski7</dc:creator>
      <dc:date>2024-10-18T17:43:34Z</dc:date>
    </item>
    <item>
      <title>Re: Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395558#M248974</link>
      <description>&lt;P&gt;My&amp;nbsp;&lt;SPAN&gt;SplitFile processor connected (follows by) ValidateRecords processor.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;ValidateRecords use CSVReader with following configuration:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="M60Larmp.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/42249i4FA28B9CDE7214D8/image-size/large?v=v2&amp;amp;px=999" role="button" title="M60Larmp.png" alt="M60Larmp.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Input_schema is avro:&lt;/P&gt;&lt;PRE&gt;{
 "type": "record",
 "name": "geo_data",
 "fields": [
    {
      "name": "id",
      "type": [
         "int",
         "null"
       ]
    },
    {
      "name": "description",
        "type": [
         "string",
         "null"
       ]
    }
   ]
  }&lt;/PRE&gt;&lt;P&gt;And the source of this pipeline in ListS3 and FetchS3 processors.&lt;/P&gt;</description>
      <pubDate>Sat, 19 Oct 2024 19:42:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395558#M248974</guid>
      <dc:creator>AndreyDE</dc:creator>
      <dc:date>2024-10-19T19:42:05Z</dc:date>
    </item>
    <item>
      <title>Re: Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395562#M248975</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/119091"&gt;@AndreyDE&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Is one flowfile going into the SplitText processor and outputting 10000 flowfiles?&lt;/P&gt;&lt;P&gt;How big is the flowfile going into the SplitText processor?&lt;BR /&gt;&lt;BR /&gt;Or is the source of the pipeline recursively getting all objects in your S3 bucket?&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;I need to a little bit more about the input going into SplitText?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 20 Oct 2024 01:46:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395562#M248975</guid>
      <dc:creator>drewski7</dc:creator>
      <dc:date>2024-10-20T01:46:59Z</dc:date>
    </item>
    <item>
      <title>Re: Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395564#M248976</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/119091"&gt;@AndreyDE&lt;/a&gt; ,&lt;/P&gt;&lt;P&gt;The reason you are getting that many flowfiles is because you are continously running the upstream processor that gets the CSV input on&amp;nbsp; 0 Secs Schedule . You seem to be new to Nifi and its typical beginner mistake. we all have been there :). By default the scheduling on every processor is set to 0 secs in earlier version, but in later releases to help avoid getting this issue the default has changed to 1 min.&amp;nbsp; To fix this, if you are doing testing , I would stop the processor that generates\gets the CSV input and whenever you want to run a test you can right click and select "&lt;STRONG&gt;Run Once&lt;/STRONG&gt;". If you are planning to run the flow as batch process where every time you are expecting to git a different file, then go the processor configuration , under Scheduling tab you can adjust the schedule accordingly by selecting either "&lt;STRONG&gt;Timer Schedule&lt;/STRONG&gt;" or "&lt;STRONG&gt;Cron Schedule&lt;/STRONG&gt;". For more info on scheduling please refer to the following:&lt;/P&gt;&lt;P&gt;&lt;A href="https://nifi.apache.org/docs/nifi-docs/html/user-guide.html#scheduling-tab" target="_blank"&gt;https://nifi.apache.org/docs/nifi-docs/html/user-guide.html#scheduling-tab&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.youtube.com/watch?v=pZq0EbfDBy4" target="_blank"&gt;https://www.youtube.com/watch?v=pZq0EbfDBy4&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hop that helps.&lt;/P&gt;</description>
      <pubDate>Sun, 20 Oct 2024 02:49:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395564#M248976</guid>
      <dc:creator>SAMSAL</dc:creator>
      <dc:date>2024-10-20T02:49:35Z</dc:date>
    </item>
    <item>
      <title>Re: Nifi SplitText split file with 2 records to 10000 and more files</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395566#M248977</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/79092"&gt;@drewski7&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/119091"&gt;@AndreyDE&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Is one flowfile going into the SplitText processor and outputting 10000 flowfiles?&lt;/P&gt;&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;&lt;P&gt;Yes - one flow file&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;P&gt;How big is the flowfile going into the SplitText processor?&lt;/P&gt;&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;&lt;P&gt;About 30 KB&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;HR /&gt;Or is the source of the pipeline recursively getting all objects in your S3 bucket?&amp;nbsp;&lt;BR /&gt;&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;&lt;P&gt;Yes, it searches all objects recursively&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 20 Oct 2024 08:45:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Nifi-SplitText-split-file-with-2-records-to-10000-and-more/m-p/395566#M248977</guid>
      <dc:creator>AndreyDE</dc:creator>
      <dc:date>2024-10-20T08:45:12Z</dc:date>
    </item>
  </channel>
</rss>

