<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Spark driver memory keeps growing in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark-driver-memory-keeps-growing/m-p/165012#M127379</link>
    <description>&lt;P&gt;HI Pierre,&lt;/P&gt;&lt;P&gt;We would need to look at the code.&lt;/P&gt;&lt;P&gt;Can you a do a persist just before stage 63 and before stage 65 check the spark UI storage tab and executor tab for data skew. If there is data skew, you will need to add a salt key to your key. &lt;/P&gt;&lt;P&gt;You could also look at creating a dataframe from the RDD rdd.toDF() and apply UDF on it. DF manage memory more efficiently.&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Amit&lt;/P&gt;</description>
    <pubDate>Tue, 09 Aug 2016 14:35:26 GMT</pubDate>
    <dc:creator>anandi</dc:creator>
    <dc:date>2016-08-09T14:35:26Z</dc:date>
  </channel>
</rss>

