<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Pyspark issue AttributeError: 'DataFrame' object has no attribute 'saveAsTextFile' in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78093#M81602</link>
    <description>&lt;P&gt;Hello community,&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My first post here, so please let me know if I'm not following protocol.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have written a pyspark.sql query as shown below. I would like the query results to be sent to a textfile but I get the error:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;AttributeError&lt;/SPAN&gt;&lt;SPAN&gt;: 'DataFrame' object has no attribute 'saveAsTextFile'&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Can someone take a look at the code and let me know where I'm going wrong:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;#%%
import findspark
findspark.init('/home/packt/spark-2.1.0-bin-hadoop2.7')
from pyspark.sql import SparkSession

def main():

  spark = SparkSession.builder.appName('aggs').getOrCreate()
df = spark.read.csv('/home/packt/Downloads/Spark_DataFrames/sales_info.csv',inferSchema=True,header=True)
df.createOrReplaceTempView('sales_info')

example8 = spark.sql("""SELECT
    *
FROM sales_info
ORDER BY Sales DESC""")
example8.saveAsTextFile("juyfd")

main()&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;Any help would be appreciated&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;carlton&lt;/P&gt;</description>
    <pubDate>Fri, 16 Sep 2022 13:33:34 GMT</pubDate>
    <dc:creator>barlow</dc:creator>
    <dc:date>2022-09-16T13:33:34Z</dc:date>
    <item>
      <title>Pyspark issue AttributeError: 'DataFrame' object has no attribute 'saveAsTextFile'</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78093#M81602</link>
      <description>&lt;P&gt;Hello community,&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My first post here, so please let me know if I'm not following protocol.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have written a pyspark.sql query as shown below. I would like the query results to be sent to a textfile but I get the error:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;AttributeError&lt;/SPAN&gt;&lt;SPAN&gt;: 'DataFrame' object has no attribute 'saveAsTextFile'&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Can someone take a look at the code and let me know where I'm going wrong:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;#%%
import findspark
findspark.init('/home/packt/spark-2.1.0-bin-hadoop2.7')
from pyspark.sql import SparkSession

def main():

  spark = SparkSession.builder.appName('aggs').getOrCreate()
df = spark.read.csv('/home/packt/Downloads/Spark_DataFrames/sales_info.csv',inferSchema=True,header=True)
df.createOrReplaceTempView('sales_info')

example8 = spark.sql("""SELECT
    *
FROM sales_info
ORDER BY Sales DESC""")
example8.saveAsTextFile("juyfd")

main()&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;Any help would be appreciated&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;carlton&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 13:33:34 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78093#M81602</guid>
      <dc:creator>barlow</dc:creator>
      <dc:date>2022-09-16T13:33:34Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark issue AttributeError: 'DataFrame' object has no attribute 'saveAsTextFile'</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78102#M81603</link>
      <description>&lt;P&gt;ok, as I'm not getting much assistance with my original question I thought I would try and figure out the problem myself. So I rewrote the pyspark.sql as follows:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;PRE&gt;#%%
import findspark
findspark.init('/home/packt/spark-2.1.0-bin-hadoop2.7')
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('ops').getOrCreate()
df = spark.read.csv('/home/packt/Downloads/Spark_DataFrames/Person_Person.csv',inferSchema=True,header=True)
df.createOrReplaceTempView('Person_Person')
myresults = spark.sql("""SELECT
  PersonType
 ,COUNT(PersonType) AS `Person Count`
FROM Person_Person
GROUP BY PersonType""")
myresults.collect()
result = myresults.collect()
result
result.saveAsTextFile("test")&lt;/PRE&gt;&lt;DIV&gt;&lt;SPAN&gt;However, I'm now getting the following error message:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &lt;SPAN&gt;AttributeError&lt;/SPAN&gt;&lt;SPAN&gt;: 'list' object has no attribute 'saveAsTextFile'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;I think this could be an easier situation to help resolve.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;So, if someone could help resolve this issue that would be most appreciated&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;Thanks&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Mon, 06 Aug 2018 00:15:38 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78102#M81603</guid>
      <dc:creator>barlow</dc:creator>
      <dc:date>2018-08-06T00:15:38Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark issue AttributeError: 'DataFrame' object has no attribute 'saveAsTextFile'</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78476#M81604</link>
      <description>&lt;P&gt;As the error message states, the object, either a DataFrame or List does not have the saveAsTextFile() method.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result.write.save() or&amp;nbsp;result.toJavaRDD.saveAsTextFile() shoud do the work, or you can refer to DataFrame or RDD api:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://spark.apache.org/docs/2.1.0/api/scala/index.html#org.apache.spark.sql.DataFrameWriter" target="_blank"&gt;https://spark.apache.org/docs/2.1.0/api/scala/index.html#org.apache.spark.sql.DataFrameWriter&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://spark.apache.org/docs/2.1.0/api/scala/index.html#org.apache.spark.rdd.RDD" target="_blank"&gt;https://spark.apache.org/docs/2.1.0/api/scala/index.html#org.apache.spark.rdd.RDD&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 14 Aug 2018 08:47:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/78476#M81604</guid>
      <dc:creator>Yuexin Zhang</dc:creator>
      <dc:date>2018-08-14T08:47:52Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark issue AttributeError: 'DataFrame' object has no attribute 'saveAsTextFile'</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/381546#M81605</link>
      <description>&lt;P&gt;&lt;SPAN&gt;To save a DataFrame as a text file in PySpark, you need to convert it to an RDD first, or use DataFrame writer functions. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Using DataFrame writer:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;df.write.format("text").save("path_to_output_directory")&lt;BR /&gt;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Converting to RDD and then using saveAsTextFile&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;&lt;SPAN&gt;rdd = df.rdd.map(lambda row: str(row))&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;rdd.saveAsTextFile("path_to_output_directory")&lt;/SPAN&gt;&lt;BR /&gt;&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Jan 2024 11:09:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Pyspark-issue-AttributeError-DataFrame-object-has-no/m-p/381546#M81605</guid>
      <dc:creator>krunal_lathiya</dc:creator>
      <dc:date>2024-01-02T11:09:55Z</dc:date>
    </item>
  </channel>
</rss>

