<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to split the dataframe of multiple files into multiple smaller dataframes in Spark? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-the-dataframe-of-multiple-files-into-multiple/m-p/63161#M22479</link>
    <description>&lt;P&gt;You can make a DataFrame over all the files and then filter&amp;nbsp;out the lines you don't want.&lt;/P&gt;&lt;P&gt;You can make a DataFrame for just the files you want, then union them together.&lt;/P&gt;&lt;P&gt;Both are viable.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you're saying different data types are mixed into sections of each file, that's harder, as you need to use something like mapPartitions to carefully process each file 3 times.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 29 Dec 2017 13:29:19 GMT</pubDate>
    <dc:creator>srowen</dc:creator>
    <dc:date>2017-12-29T13:29:19Z</dc:date>
  </channel>
</rss>

