<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Converting Large CSV into JSON in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Converting-Large-CSV-into-JSON/m-p/242775#M204574</link>
    <description>&lt;P&gt;I have a relatively large CSV (~80GB) I need to transform into multiple JSON documents/records. I'm using a ConvertRecord processor with a CSVReader and AvroRecordSetWriter and that's where my CSV gets stuck. What's the best approach? Break up the CSV prior to converting it or try to get more horsepower on the server? &lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Server Mem: 16GB&lt;/LI&gt;&lt;LI&gt;Cores: 4&lt;/LI&gt;&lt;LI&gt;Maximum Timer Driven Thread Count : 16&lt;/LI&gt;&lt;LI&gt;Java Min/Max Heap: 2GB/10GB&lt;/LI&gt;&lt;/UL&gt;</description>
    <pubDate>Tue, 18 Jun 2019 19:05:48 GMT</pubDate>
    <dc:creator>millerbill3</dc:creator>
    <dc:date>2019-06-18T19:05:48Z</dc:date>
    <item>
      <title>Converting Large CSV into JSON</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Converting-Large-CSV-into-JSON/m-p/242775#M204574</link>
      <description>&lt;P&gt;I have a relatively large CSV (~80GB) I need to transform into multiple JSON documents/records. I'm using a ConvertRecord processor with a CSVReader and AvroRecordSetWriter and that's where my CSV gets stuck. What's the best approach? Break up the CSV prior to converting it or try to get more horsepower on the server? &lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Server Mem: 16GB&lt;/LI&gt;&lt;LI&gt;Cores: 4&lt;/LI&gt;&lt;LI&gt;Maximum Timer Driven Thread Count : 16&lt;/LI&gt;&lt;LI&gt;Java Min/Max Heap: 2GB/10GB&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Tue, 18 Jun 2019 19:05:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Converting-Large-CSV-into-JSON/m-p/242775#M204574</guid>
      <dc:creator>millerbill3</dc:creator>
      <dc:date>2019-06-18T19:05:48Z</dc:date>
    </item>
    <item>
      <title>Re: Converting Large CSV into JSON</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Converting-Large-CSV-into-JSON/m-p/242776#M204575</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/114774/millerbill3.html"&gt;@Bill Miller&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Try with series of &lt;A rel="noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer" href="https://nifi.apache.org/docs/nifi-docs/components/org.apache.nifi/nifi-standard-nar/1.5.0/org.apache.nifi.processors.standard.SplitRecord/index.html" target="_blank"&gt;SplitRecord&lt;/A&gt; processors to create smaller chunks of files.&lt;/P&gt;&lt;P&gt;Follow the similar approach mentioned in &lt;A rel="noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer noopener noreferrer" href="https://community.hortonworks.com/questions/122858/nifi-splittext-big-file.html" target="_blank"&gt;this&lt;/A&gt; thread and see if you get any performance with this approach.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jun 2019 08:21:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Converting-Large-CSV-into-JSON/m-p/242776#M204575</guid>
      <dc:creator>Shu_ashu</dc:creator>
      <dc:date>2019-06-19T08:21:33Z</dc:date>
    </item>
  </channel>
</rss>

