<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Spark on S3 in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103438#M29844</link>
    <description>&lt;P&gt;Take a look at &lt;A href="https://community.hortonworks.com/articles/25523/hdp-240-and-spark-160-connecting-to-aws-s3-buckets.html" target="_blank"&gt;https://community.hortonworks.com/articles/25523/hdp-240-and-spark-160-connecting-to-aws-s3-buckets.html&lt;/A&gt; which gives details on how to access S3 from spark. &lt;/P&gt;</description>
    <pubDate>Thu, 26 May 2016 21:01:42 GMT</pubDate>
    <dc:creator>ravi1</dc:creator>
    <dc:date>2016-05-26T21:01:42Z</dc:date>
    <item>
      <title>Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103435#M29841</link>
      <description>&lt;P&gt;Unable to execute the queries on S3 data using SPARK and PYSPARK. It is throwing below error.&lt;/P&gt;&lt;P&gt;: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found&lt;/P&gt;&lt;P&gt;  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)&lt;/P&gt;&lt;P&gt;  at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2638)&lt;/P&gt;&lt;P&gt;  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2651)&lt;/P&gt;&lt;P&gt;….&lt;/P&gt;&lt;P&gt;….&lt;/P&gt;&lt;P&gt;Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found&lt;/P&gt;&lt;P&gt;  at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)&lt;/P&gt;&lt;P&gt;  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193)&lt;/P&gt;&lt;P&gt;we  have tried it by adding below parameters but no luck.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Parameter name&lt;/STRONG&gt;: fs.s3a.impl&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Parameter value:&lt;/STRONG&gt; org.apache.hadoop.fs.s3a.S3AFileSystem&lt;/P&gt;&lt;P&gt;Added this paramter in hdfs.site.xml, core-site.xml, hive-site.xml and also added the aws jar files in mapred-site.xml (added to classpath)files.&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 20:02:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103435#M29841</guid>
      <dc:creator>khaslbeck</dc:creator>
      <dc:date>2016-05-26T20:02:05Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103436#M29842</link>
      <description>&lt;P&gt;Sorry, this was the article I meant to point you to:&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/25578/how-to-access-data-files-stored-in-aws-s3-buckets.html" target="_blank"&gt;https://community.hortonworks.com/articles/25578/how-to-access-data-files-stored-in-aws-s3-buckets.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 20:26:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103436#M29842</guid>
      <dc:creator>tmccuch</dc:creator>
      <dc:date>2016-05-26T20:26:06Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103437#M29843</link>
      <description>&lt;P&gt;Hi &lt;A href="https://community.hortonworks.com/questions/35801/spark-on-s3.html#"&gt;@Kirk Haslbeck&lt;/A&gt;,&lt;/P&gt;&lt;P&gt;don't know which version you are using but if you didn't see then take a look at below Jira it might help.&lt;/P&gt;&lt;P&gt;&lt;A href="https://issues.apache.org/jira/browse/SPARK-7442" target="_blank"&gt;https://issues.apache.org/jira/browse/SPARK-7442&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 20:38:30 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103437#M29843</guid>
      <dc:creator>jyadav</dc:creator>
      <dc:date>2016-05-26T20:38:30Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103438#M29844</link>
      <description>&lt;P&gt;Take a look at &lt;A href="https://community.hortonworks.com/articles/25523/hdp-240-and-spark-160-connecting-to-aws-s3-buckets.html" target="_blank"&gt;https://community.hortonworks.com/articles/25523/hdp-240-and-spark-160-connecting-to-aws-s3-buckets.html&lt;/A&gt; which gives details on how to access S3 from spark. &lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 21:01:42 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103438#M29844</guid>
      <dc:creator>ravi1</dc:creator>
      <dc:date>2016-05-26T21:01:42Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103439#M29845</link>
      <description>&lt;P&gt;yep, S3A implementation is not complete yet, try using S3N for now or follow Alex's article referenced below.&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 22:41:00 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103439#M29845</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-05-26T22:41:00Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103440#M29846</link>
      <description>&lt;P&gt;Thanks all &lt;A rel="user" href="https://community.cloudera.com/users/393/aervits.html" nodeid="393"&gt;@Artem Ervits&lt;/A&gt;  &lt;A rel="user" href="https://community.cloudera.com/users/454/tmccuch.html" nodeid="454"&gt;@Tom McCuch&lt;/A&gt; for the comments.  I did get it resolved by passing all the S3 jars properly on the classpath.  The articles included in your threads helped.  &lt;/P&gt;</description>
      <pubDate>Fri, 27 May 2016 02:41:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103440#M29846</guid>
      <dc:creator>khaslbeck</dc:creator>
      <dc:date>2016-05-27T02:41:49Z</dc:date>
    </item>
    <item>
      <title>Re: Spark on S3</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103441#M29847</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/2977/khaslbeck.html" nodeid="2977"&gt;@Kirk Haslbeck&lt;/A&gt; - I was working on something similar. Writing PySpark to use SparkSQL to analyze data in S3 using the S3A filesystem client. I documented my work with instructions here: &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/36339/spark-s3a-filesystem-client-from-hdp-to-access-s3.html" target="_blank"&gt;https://community.hortonworks.com/articles/36339/spark-s3a-filesystem-client-from-hdp-to-access-s3.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 30 May 2016 23:57:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-on-S3/m-p/103441#M29847</guid>
      <dc:creator>bmathew</dc:creator>
      <dc:date>2016-05-30T23:57:05Z</dc:date>
    </item>
  </channel>
</rss>

