<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: executorLostFailure (executor 6 exited unrel+details ExecutorLostFailure (executor 6 exited unrelated to the running tasks) Reason: Container marked as failed: container_1677979357691_0001 on host: xyz Exit status: -100. Diagnostics: Container released on a *lost* node. in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365373#M239336</link>
    <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103828"&gt;@zintan&lt;/a&gt;,&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Welcome to our community! To help you get the best possible answer, I have tagged our Spark experts&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/78612"&gt;@RangaReddy&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95503"&gt;@steven-matison&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/40384"&gt;@smdas&lt;/a&gt;&amp;nbsp; who may be able to assist you further.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Please feel free to provide any additional information or details about your query, and we hope that you will find a satisfactory solution to your question.&lt;/P&gt;</description>
    <pubDate>Mon, 06 Mar 2023 08:01:25 GMT</pubDate>
    <dc:creator>VidyaSargur</dc:creator>
    <dc:date>2023-03-06T08:01:25Z</dc:date>
    <item>
      <title>How to correct executorLostFailure  Reason: Container marked as failed?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365345#M239327</link>
      <description>&lt;P&gt;Hi team, I'm facing the below-mentioned issue.&amp;nbsp;&lt;BR /&gt;executorLostFailure (executor 6 exited unrel+details ExecutorLostFailure (executor 6 exited unrelated to the running tasks) Reason: Container marked as failed: container_1677979357691_0001 on host: xyz Exit status: -100. Diagnostics: Container released on a *lost* node.&lt;BR /&gt;&lt;BR /&gt;Previously I'm using 18 r5.4xlarge machines, so now I want to move it to 5 r5.16xlarge machines. So I made a few config changes&lt;BR /&gt;I'm using m5.2xlarge as a master in both the cases&lt;BR /&gt;I didn't change the EBS volume size. Previously and now also it is 950G&amp;nbsp;&lt;BR /&gt;spark.executor.cores: "5"&lt;BR /&gt;spark.driver.cores: "5"&lt;BR /&gt;spark.executor.memory: "35G"&lt;BR /&gt;spark.driver.memory: "30G"&lt;BR /&gt;spark.executor.instances: "60"&lt;BR /&gt;spark.dynamicAllocation.enabled: "false"&lt;BR /&gt;spark.executor.memoryOverhead: "6G"&lt;BR /&gt;spark.excludeOnFailure.enabled: "true"&lt;BR /&gt;spark.excludeOnFailure.enabled: "true"&lt;BR /&gt;spark.excludeOnFailure.killExcludedExecutors: "true"&lt;BR /&gt;spark.excludeOnFailure.application.fetchFailure.enabled: "true"&lt;BR /&gt;spark.excludeOnFailure.application.maxFailedTasksPerExecutor: "4"&lt;BR /&gt;spark.excludeOnFailure.application.maxFailedExecutorsPerNode: "5"&lt;BR /&gt;spark.excludeOnFailure.application.maxFailedExecutors: "6"&lt;BR /&gt;spark.sql.shuffle.partitions: "2000"&lt;BR /&gt;spark.executor.heartbeatInterval: "120s"&lt;BR /&gt;spark.network.timeout: "2400s"&lt;BR /&gt;&lt;BR /&gt;But few jobs are failing with the above mentioned error. Please help. I'm completely lost&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Mar 2023 19:43:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365345#M239327</guid>
      <dc:creator>zintan</dc:creator>
      <dc:date>2023-03-07T19:43:08Z</dc:date>
    </item>
    <item>
      <title>Re: executorLostFailure (executor 6 exited unrel+details ExecutorLostFailure (executor 6 exited unrelated to the running tasks) Reason: Container marked as failed: container_1677979357691_0001 on host: xyz Exit status: -100. Diagnostics: Container released on a *lost* node.</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365373#M239336</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103828"&gt;@zintan&lt;/a&gt;,&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Welcome to our community! To help you get the best possible answer, I have tagged our Spark experts&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/78612"&gt;@RangaReddy&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95503"&gt;@steven-matison&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/40384"&gt;@smdas&lt;/a&gt;&amp;nbsp; who may be able to assist you further.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Please feel free to provide any additional information or details about your query, and we hope that you will find a satisfactory solution to your question.&lt;/P&gt;</description>
      <pubDate>Mon, 06 Mar 2023 08:01:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365373#M239336</guid>
      <dc:creator>VidyaSargur</dc:creator>
      <dc:date>2023-03-06T08:01:25Z</dc:date>
    </item>
    <item>
      <title>Re: executorLostFailure (executor 6 exited unrel+details ExecutorLostFailure (executor 6 exited unrelated to the running tasks) Reason: Container marked as failed: container_1677979357691_0001 on host: xyz Exit status: -100. Diagnostics: Container released on a *lost* node.</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365424#M239347</link>
      <description>&lt;P&gt;Hey Vidya, thanks for the response. Please let me know if any further information is required regarding resolving the issue&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 06 Mar 2023 15:54:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-correct-executorLostFailure-Reason-Container-marked/m-p/365424#M239347</guid>
      <dc:creator>zintan</dc:creator>
      <dc:date>2023-03-06T15:54:24Z</dc:date>
    </item>
  </channel>
</rss>

