<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Unable to Create a single file with PySpark query in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Unable-to-Create-a-single-file-with-PySpark-query/m-p/213876#M175800</link>
    <description>&lt;P&gt;Hello Community,&lt;/P&gt;&lt;P&gt;I trying to create a single file from an output query that is overwritten each time query is run. However, I keep on getting multiple part-00001 files. I have tried the following codes. They appear to overwrite the file, but a different filename is generate each time.&lt;/P&gt;&lt;PRE&gt;example1.coalesce(1).write.option("header","true").mode("overwrite").csv("adl://carlslake.azuredatalakestore.net/jfolder2/outputfiles/myoutput4/newresults") &lt;/PRE&gt;&lt;PRE&gt;example1.coalesce(1).write.option("header","true").mode("overwrite").csv("adl://carlslake.azuredatalakestore.net/jfolder2/outputfiles/myoutput4/newresults/theresults.csv")
carl = example1.show() &lt;/PRE&gt;&lt;PRE&gt;example1.coalesce(1).write.mode("append").json("adl://carlslake.azuredatalakestore.net/jfolder2/outputfiles/myoutput/myresults.json")&lt;/PRE&gt;&lt;PRE&gt;example1.repartition(1).write.format("csv").mode("overwrite").save("adl://carlslake.azuredatalakestore.net/jfolder2/outputfiles/myoutput/thefile.csv")&lt;/PRE&gt;&lt;P&gt;Can someone show me how write code that will result in a single file that is overwritten without changing the filename?&lt;/P&gt;</description>
    <pubDate>Sat, 20 Oct 2018 21:46:53 GMT</pubDate>
    <dc:creator>barlow</dc:creator>
    <dc:date>2018-10-20T21:46:53Z</dc:date>
  </channel>
</rss>

