<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Measuring Spark job performance in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Measuring-Spark-job-performance/m-p/307178#M223142</link>
    <description>&lt;P&gt;I have a Spark job where I am tuning the code and settings to improve performance. However I'm not sure how to track the performance when using a shared cluster to see if I'm making things better or worse!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I've considered the following metrics, though with each of them even when running an identical job twice I can get very different values, so it doesn't seem a good way to track the performance of the job.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Aggregated resource usage (CPU and Memory)&lt;/LI&gt;&lt;LI&gt;Task time (active and total)&lt;/LI&gt;&lt;LI&gt;Job run time&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Ideally I'd like to be able to track the performance of the job over time as the codebase evolves&lt;/P&gt;</description>
    <pubDate>Tue, 08 Dec 2020 17:00:26 GMT</pubDate>
    <dc:creator>TimmehG</dc:creator>
    <dc:date>2020-12-08T17:00:26Z</dc:date>
  </channel>
</rss>

