<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question spark job shuffle write super slow in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/spark-job-shuffle-write-super-slow/m-p/220400#M182285</link>
    <description>&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="76637-untitled2.png" style="width: 1096px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/15886iB716AC251285499A/image-size/medium?v=v2&amp;amp;px=400" role="button" title="76637-untitled2.png" alt="76637-untitled2.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="76646-untitled2.png" style="width: 1096px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/15887iD5BC8D822193D1F6/image-size/medium?v=v2&amp;amp;px=400" role="button" title="76646-untitled2.png" alt="76646-untitled2.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;why is the spark shuffle stage is so slow for 1.6 MB shuffle write, and 2.4 MB input?.Also why is the shuffle write happening only on one executor ?.I am running a 3 node cluster with 8 cores each.&lt;/P&gt;&lt;P&gt;Please see my code and Spark UI pictures below&lt;/P&gt;&lt;P&gt;Code:&lt;/P&gt;&lt;PRE&gt;&amp;lt;code&amp;gt;JavaPairRDD&amp;lt;String, String&amp;gt; javaPairRDD = c.mapToPair(new PairFunction&amp;lt;String, String, String&amp;gt;() {
    @Override
    public Tuple2&amp;lt;String, String&amp;gt; call(String arg0) throws Exception {
        // TODO Auto-generated method stub

        try {
            if (org.apache.commons.lang.StringUtils.isEmpty(arg0)) {
                return new Tuple2&amp;lt;String, String&amp;gt;("", "");
            }
            Tuple2&amp;lt;String, String&amp;gt; t = new Tuple2&amp;lt;String, String&amp;gt;(getESIndexName(arg0), arg0);
            return t;
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("******* exception in getESIndexName");
        }
        return new Tuple2&amp;lt;String, String&amp;gt;("", "");
    }
});

java.util.Map&amp;lt;String, Iterable&amp;lt;String&amp;gt;&amp;gt; map1 = javaPairRDD.groupByKey().collectAsMap();* &lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="76636-untitled1.png" style="width: 1560px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/15888iCE03E57EF3577E96/image-size/medium?v=v2&amp;amp;px=400" role="button" title="76636-untitled1.png" alt="76636-untitled1.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Sun, 18 Aug 2019 02:28:41 GMT</pubDate>
    <dc:creator>pradeepbill</dc:creator>
    <dc:date>2019-08-18T02:28:41Z</dc:date>
  </channel>
</rss>

