<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Spark - Cannot mkdir file in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/91117#M21678</link>
    <description>&lt;P&gt;My simple ETL code:&lt;/P&gt;&lt;PRE&gt;&lt;SPAN&gt;def &lt;/SPAN&gt;xmlConvert(spark):&lt;BR /&gt;    etl_time = time.time()&lt;BR /&gt;    df = spark.read.format(&lt;SPAN&gt;'com.databricks.spark.xml'&lt;/SPAN&gt;).options(&lt;SPAN&gt;rowTag&lt;/SPAN&gt;=&lt;SPAN&gt;'HistoricalTextData'&lt;/SPAN&gt;).load(&lt;BR /&gt;        &lt;SPAN&gt;'file:///home/zangetsu/proj/prometheus-core/demo/demo-1-iot-predictive-maintainance/dataset/data_train'&lt;/SPAN&gt;)&lt;BR /&gt;    df = df.withColumn(&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;, df[&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;].cast(&lt;SPAN&gt;"timestamp"&lt;/SPAN&gt;)).groupBy(&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;).pivot(&lt;SPAN&gt;"TagName"&lt;/SPAN&gt;).sum(&lt;BR /&gt;        &lt;SPAN&gt;"TagValue"&lt;/SPAN&gt;).na.fill(&lt;SPAN&gt;0&lt;/SPAN&gt;)&lt;BR /&gt;    df.repartition(&lt;SPAN&gt;1&lt;/SPAN&gt;).write.csv(&lt;BR /&gt;        &lt;SPAN&gt;path&lt;/SPAN&gt;=&lt;SPAN&gt;"file:///proj/prometheus-core/demo/demo-1-iot-predictive-maintainance/dataset/"&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;mode&lt;/SPAN&gt;=&lt;SPAN&gt;"overwrite"&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;header&lt;/SPAN&gt;=&lt;SPAN&gt;True&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;sep&lt;/SPAN&gt;=&lt;SPAN&gt;","&lt;/SPAN&gt;)&lt;BR /&gt;    &lt;SPAN&gt;print&lt;/SPAN&gt;(&lt;SPAN&gt;"Time taken to do xml transformation: --- %s seconds ---" &lt;/SPAN&gt;% (time.time() - etl_time))&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;__name__ == &lt;SPAN&gt;'__main__'&lt;/SPAN&gt;:&lt;BR /&gt;    spark = SparkSession \&lt;BR /&gt;        .builder \&lt;BR /&gt;        .appName(&lt;SPAN&gt;'XML ETL'&lt;/SPAN&gt;) \&lt;BR /&gt;        .master(&lt;SPAN&gt;"local[*]"&lt;/SPAN&gt;) \&lt;BR /&gt;        .config(&lt;SPAN&gt;'spark.jars.packages'&lt;/SPAN&gt;, &lt;SPAN&gt;'com.databricks:spark-xml_2.11:0.5.0'&lt;/SPAN&gt;) \&lt;BR /&gt;        .getOrCreate()&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;print&lt;/SPAN&gt;(&lt;SPAN&gt;'Session created'&lt;/SPAN&gt;)&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;try&lt;/SPAN&gt;:&lt;BR /&gt;        xmlConvert(spark)&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;finally&lt;/SPAN&gt;:&lt;BR /&gt;        spark.stop()&lt;/PRE&gt;&lt;P&gt;Still throwing the issue reported.&lt;/P&gt;</description>
    <pubDate>Fri, 31 May 2019 16:24:55 GMT</pubDate>
    <dc:creator>ArchenROOT</dc:creator>
    <dc:date>2019-05-31T16:24:55Z</dc:date>
    <item>
      <title>Spark - Cannot mkdir file</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67896#M21675</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have an issue with Spark, the job failed with this error message :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;EM&gt;scala&amp;gt; someDF.write.mode(SaveMode.Append).parquet("file:///data/bbox/tmp")&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;[Stage 0:&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; (0 + 2) / 2]18/06/05 12:37:39 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, dec-bb-dl03.bbox-dec.lab.oxv.fr, executor 1): java.io.IOException: Mkdirs failed to create file:/data/bbox/tmp/_temporary/0/_temporary/attempt_201806051237_0000_m_000000_0 (exists=false, cwd=file:/yarn/nm/usercache/hdfs/appcache/application_1527756804026_0065/container_e33_1527756804026_0065_01_000002)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:447)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:433)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:926)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:907)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetFileWriter.&amp;lt;init&amp;gt;(ParquetFileWriter.java:225)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:311)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:282)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.&amp;lt;init&amp;gt;(ParquetRelation.scala:94)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.parquet.ParquetRelation$$anon$3.newInstance(ParquetRelation.scala:286)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.BaseWriterContainer.newOutputWriter(WriterContainer.scala:129)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:255)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:148)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:148)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.scheduler.Task.run(Task.scala:89)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:242)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:748)&lt;/EM&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;EM&gt;18/06/05 12:37:39 WARN scheduler.TaskSetManager: Lost task 0.1 in stage 0.0 (TID 2, dec-bb-dl03.bbox-dec.lab.oxv.fr, executor 1): java.io.IOException: Mkdirs failed to create file:/data/bbox/tmp/_temporary/0/_temporary/attempt_201806051237_0000_m_000000_1 (exists=false, cwd=file:/yarn/nm/usercache/hdfs/appcache/application_1527756804026_0065/container_e33_1527756804026_0065_01_000002)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:447)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:433)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:926)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:907)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetFileWriter.&amp;lt;init&amp;gt;(ParquetFileWriter.java:225)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:311)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:282)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.&amp;lt;init&amp;gt;(ParquetRelation.scala:94)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.parquet.ParquetRelation$$anon$3.newInstance(ParquetRelation.scala:286)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.BaseWriterContainer.newOutputWriter(WriterContainer.scala:129)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:255)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:148)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:148)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.scheduler.Task.run(Task.scala:89)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:242)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/EM&gt;&lt;BR /&gt;&lt;EM&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:748)&lt;/EM&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We use CDH 5.14 with the Spark included into the CDH (1.6.0), we think about an version incompatibility issue.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;First I tried to change directory rights (777 or give write right to hadoop group), but it didn't work.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any idea ?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Julien.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 13:18:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67896#M21675</guid>
      <dc:creator>JSenzier</dc:creator>
      <dc:date>2022-09-16T13:18:06Z</dc:date>
    </item>
    <item>
      <title>Re: Spark - Cannot mkdir file</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67972#M21676</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/26681"&gt;@JSenzier&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Right, this won't work in &lt;EM&gt;client&lt;/EM&gt; mode. It's not about the compatibility of Spark1.6 with CDH version, but the way deploy mode 'client' works. &lt;SPAN&gt;spark-shell on Cloudera installs runs in yarn-client mode by default. Given the use of file:/// (which is generally used for local disks) we recommend running the app in local mode for such local testing or you can&amp;nbsp;turn your script (using maven or sbt) into a jar file and&amp;nbsp;execute this using spark-submit in cluster mode.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;$ spark-shell --master local[*]&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Jun 2018 14:37:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67972#M21676</guid>
      <dc:creator>AutoIN</dc:creator>
      <dc:date>2018-06-07T14:37:33Z</dc:date>
    </item>
    <item>
      <title>Re: Spark - Cannot mkdir file</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67973#M21677</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for your help, it's working, it's not very easy to understand when we encountered this issue (i didn't understand why yarn tried to create files into _temporary directory first), but with this explanation we can now understand this behaviour, so thank you &lt;span class="lia-unicode-emoji" title=":winking_face:"&gt;😉&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Jun 2018 14:49:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/67973#M21677</guid>
      <dc:creator>JSenzier</dc:creator>
      <dc:date>2018-06-07T14:49:48Z</dc:date>
    </item>
    <item>
      <title>Re: Spark - Cannot mkdir file</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/91117#M21678</link>
      <description>&lt;P&gt;My simple ETL code:&lt;/P&gt;&lt;PRE&gt;&lt;SPAN&gt;def &lt;/SPAN&gt;xmlConvert(spark):&lt;BR /&gt;    etl_time = time.time()&lt;BR /&gt;    df = spark.read.format(&lt;SPAN&gt;'com.databricks.spark.xml'&lt;/SPAN&gt;).options(&lt;SPAN&gt;rowTag&lt;/SPAN&gt;=&lt;SPAN&gt;'HistoricalTextData'&lt;/SPAN&gt;).load(&lt;BR /&gt;        &lt;SPAN&gt;'file:///home/zangetsu/proj/prometheus-core/demo/demo-1-iot-predictive-maintainance/dataset/data_train'&lt;/SPAN&gt;)&lt;BR /&gt;    df = df.withColumn(&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;, df[&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;].cast(&lt;SPAN&gt;"timestamp"&lt;/SPAN&gt;)).groupBy(&lt;SPAN&gt;"TimeStamp"&lt;/SPAN&gt;).pivot(&lt;SPAN&gt;"TagName"&lt;/SPAN&gt;).sum(&lt;BR /&gt;        &lt;SPAN&gt;"TagValue"&lt;/SPAN&gt;).na.fill(&lt;SPAN&gt;0&lt;/SPAN&gt;)&lt;BR /&gt;    df.repartition(&lt;SPAN&gt;1&lt;/SPAN&gt;).write.csv(&lt;BR /&gt;        &lt;SPAN&gt;path&lt;/SPAN&gt;=&lt;SPAN&gt;"file:///proj/prometheus-core/demo/demo-1-iot-predictive-maintainance/dataset/"&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;mode&lt;/SPAN&gt;=&lt;SPAN&gt;"overwrite"&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;header&lt;/SPAN&gt;=&lt;SPAN&gt;True&lt;/SPAN&gt;,&lt;BR /&gt;        &lt;SPAN&gt;sep&lt;/SPAN&gt;=&lt;SPAN&gt;","&lt;/SPAN&gt;)&lt;BR /&gt;    &lt;SPAN&gt;print&lt;/SPAN&gt;(&lt;SPAN&gt;"Time taken to do xml transformation: --- %s seconds ---" &lt;/SPAN&gt;% (time.time() - etl_time))&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;__name__ == &lt;SPAN&gt;'__main__'&lt;/SPAN&gt;:&lt;BR /&gt;    spark = SparkSession \&lt;BR /&gt;        .builder \&lt;BR /&gt;        .appName(&lt;SPAN&gt;'XML ETL'&lt;/SPAN&gt;) \&lt;BR /&gt;        .master(&lt;SPAN&gt;"local[*]"&lt;/SPAN&gt;) \&lt;BR /&gt;        .config(&lt;SPAN&gt;'spark.jars.packages'&lt;/SPAN&gt;, &lt;SPAN&gt;'com.databricks:spark-xml_2.11:0.5.0'&lt;/SPAN&gt;) \&lt;BR /&gt;        .getOrCreate()&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;print&lt;/SPAN&gt;(&lt;SPAN&gt;'Session created'&lt;/SPAN&gt;)&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;try&lt;/SPAN&gt;:&lt;BR /&gt;        xmlConvert(spark)&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;finally&lt;/SPAN&gt;:&lt;BR /&gt;        spark.stop()&lt;/PRE&gt;&lt;P&gt;Still throwing the issue reported.&lt;/P&gt;</description>
      <pubDate>Fri, 31 May 2019 16:24:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/91117#M21678</guid>
      <dc:creator>ArchenROOT</dc:creator>
      <dc:date>2019-05-31T16:24:55Z</dc:date>
    </item>
    <item>
      <title>Re: Spark - Cannot mkdir file</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/91118#M21679</link>
      <description>&lt;P&gt;And I found a solution by pointint job.local.dir to directory with the code:&lt;/P&gt;&lt;PRE&gt;spark = SparkSession \&lt;BR /&gt;    .builder \&lt;BR /&gt;    .appName(&lt;SPAN&gt;'XML ETL'&lt;/SPAN&gt;) \&lt;BR /&gt;    .master(&lt;SPAN&gt;"local[*]"&lt;/SPAN&gt;) \&lt;BR /&gt;    .config(&lt;SPAN&gt;'job.local.dir'&lt;/SPAN&gt;, &lt;SPAN&gt;'file:/home/zangetsu/proj/prometheus-core/demo/demo-1-iot-predictive-maintainance'&lt;/SPAN&gt;) \&lt;BR /&gt;    .config(&lt;SPAN&gt;'spark.jars.packages'&lt;/SPAN&gt;, &lt;SPAN&gt;'com.databricks:spark-xml_2.11:0.5.0'&lt;/SPAN&gt;) \&lt;BR /&gt;    .getOrCreate()&lt;/PRE&gt;&lt;P&gt;Now all works&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 31 May 2019 16:29:57 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark-Cannot-mkdir-file/m-p/91118#M21679</guid>
      <dc:creator>ArchenROOT</dc:creator>
      <dc:date>2019-05-31T16:29:57Z</dc:date>
    </item>
  </channel>
</rss>

