<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question How to change Spark _temporary directory when writing data? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237389#M199202</link>
    <description>&lt;P&gt;I have two spark applications writing data to one directory on HDFS, which cause the faster completed app will delete the working directory _temporary containing some temp file belonging to another app.&lt;/P&gt;&lt;P&gt;So can I specify a _temporary directory for each Spark application？ &lt;/P&gt;</description>
    <pubDate>Thu, 06 Dec 2018 10:16:52 GMT</pubDate>
    <dc:creator>darouwan</dc:creator>
    <dc:date>2018-12-06T10:16:52Z</dc:date>
    <item>
      <title>How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237389#M199202</link>
      <description>&lt;P&gt;I have two spark applications writing data to one directory on HDFS, which cause the faster completed app will delete the working directory _temporary containing some temp file belonging to another app.&lt;/P&gt;&lt;P&gt;So can I specify a _temporary directory for each Spark application？ &lt;/P&gt;</description>
      <pubDate>Thu, 06 Dec 2018 10:16:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237389#M199202</guid>
      <dc:creator>darouwan</dc:creator>
      <dc:date>2018-12-06T10:16:52Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237390#M199203</link>
      <description>&lt;P&gt;@&lt;A href="https://community.hortonworks.com/users/65196/k-2feng.html"&gt;Junfeng Chen&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You can change the path to the temp folder for each Spark application by spark.local.dir property like below&lt;/P&gt;&lt;PRE&gt;SparkConf conf = new SparkConf().setMaster("local”).setAppName("test”).set("spark.local.dir", "/tmp/spark-temp");&lt;/PRE&gt;&lt;P&gt;&lt;A href="https://spark.apache.org/docs/2.2.0/configuration.html#application-properties" target="_blank"&gt;Reference&lt;/A&gt;&lt;BR /&gt;Please accept the answer you found most useful&lt;/P&gt;</description>
      <pubDate>Thu, 06 Dec 2018 12:27:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237390#M199203</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2018-12-06T12:27:04Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237391#M199204</link>
      <description>&lt;P style="margin-left: 20px;"&gt;Thanks &lt;A rel="user" href="https://community.cloudera.com/users/97811/jagadeesanas.html" nodeid="97811"&gt;@Jagadeesan A S&lt;/A&gt;&lt;/P&gt;&lt;P&gt;_temporary is a temp directory under path of the df.write.parquet(path) on hdfs. However  spark.local.dir default value is /tmp, and in document, &lt;/P&gt;&lt;PRE&gt;Directory to use for "scratch" space in Spark, including map output files and RDDs that get stored on disk. This should be on a fast, local disk in your system.&lt;/PRE&gt;&lt;P&gt;So it should be a directory on local file system. I am not sure spark.local.dir refers to the temp directory of spark writing ...&lt;/P&gt;</description>
      <pubDate>Thu, 06 Dec 2018 12:32:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237391#M199204</guid>
      <dc:creator>darouwan</dc:creator>
      <dc:date>2018-12-06T12:32:50Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237392#M199205</link>
      <description>&lt;P&gt;@&lt;A href="https://community.hortonworks.com/users/65196/k-2feng.html"&gt;Junfeng Chen&lt;/A&gt;&lt;/P&gt;&lt;P&gt;That's true, above property for local filesystem. For hdfs could you try to use &lt;STRONG&gt;&lt;EM&gt;Append &lt;/EM&gt;&lt;/STRONG&gt;instead of &lt;STRONG&gt;Overwrite &lt;/STRONG&gt;? But problem in this, we need to delete files manually from the temp directory.&lt;/P&gt;</description>
      <pubDate>Thu, 06 Dec 2018 14:03:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237392#M199205</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2018-12-06T14:03:52Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237393#M199206</link>
      <description>&lt;P&gt;Hi &lt;A rel="user" href="https://community.cloudera.com/users/97811/jagadeesanas.html" nodeid="97811"&gt;@Jagadeesan A S&lt;/A&gt;&lt;/P&gt;&lt;P&gt;my current save mode is append. My sparking streaming apps will run every 5 min,  it is not convenient to delete manually....So I think the better solution is customize the temp location. &lt;/P&gt;&lt;P&gt;Or Can I set offset of the scheduled running time? For example, my current 2 apps every 5 minutes, that's run at 0, 5, 10, 15, 20&lt;/P&gt;&lt;P&gt;Can I set a schedule, make one still runs at 0, 5, 10 , 15, and another runs at 2.5, 7.5, 10.5?&lt;/P&gt;</description>
      <pubDate>Fri, 07 Dec 2018 09:44:10 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/237393#M199206</guid>
      <dc:creator>darouwan</dc:creator>
      <dc:date>2018-12-07T09:44:10Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/278839#M208274</link>
      <description>&lt;P&gt;Did you ever figure out the solution? I am facing the same issue&lt;/P&gt;</description>
      <pubDate>Fri, 04 Oct 2019 14:51:41 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/278839#M208274</guid>
      <dc:creator>Siddu198</dc:creator>
      <dc:date>2019-10-04T14:51:41Z</dc:date>
    </item>
    <item>
      <title>Re: How to change Spark _temporary directory when writing data?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/356605#M237346</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/70028"&gt;@Siddu198&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Add this config to your job:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;set("mapreduce.fileoutputcommitter.algorithm.version","2")&lt;/LI-CODE&gt;&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 01 Nov 2022 07:01:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-change-Spark-temporary-directory-when-writing-data/m-p/356605#M237346</guid>
      <dc:creator>nur.majid</dc:creator>
      <dc:date>2022-11-01T07:01:49Z</dc:date>
    </item>
  </channel>
</rss>

