<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to save a dataframe as ORC file ? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-save-a-dataframe-as-ORC-file/m-p/144703#M107282</link>
    <description>&lt;P&gt;Create some properties in your pom.xml:&lt;/P&gt;&lt;PRE&gt;&amp;lt;properties&amp;gt;
  &amp;lt;project.build.sourceEncoding&amp;gt;UTF-8&amp;lt;/project.build.sourceEncoding&amp;gt;
  &amp;lt;scala.core&amp;gt;2.10&amp;lt;/scala.core&amp;gt;
  &amp;lt;spark.version&amp;gt;1.6.1&amp;lt;/spark.version&amp;gt;
&amp;lt;/properties&amp;gt;
&lt;/PRE&gt;&lt;P&gt;Include spark-hive in your project's dependencies:&lt;/P&gt;&lt;PRE&gt;&amp;lt;dependency&amp;gt;
   &amp;lt;groupId&amp;gt;org.apache.spark&amp;lt;/groupId&amp;gt;
   &amp;lt;artifactId&amp;gt;spark-hive_${scala.core}&amp;lt;/artifactId&amp;gt;
   &amp;lt;version&amp;gt;${spark.version}&amp;lt;/version&amp;gt;
&amp;lt;/dependency&amp;gt;
&lt;/PRE&gt;&lt;P&gt;Then in your code:&lt;/P&gt;&lt;PRE&gt;// create a new hive context from the spark context
val hiveContext = new org.apache.spark.sql.hive.HiveContext(sparkContext)
// create the data frame and write it to orc
// output will be a directory of orc files
val df = hiveContext.createDataFrame(rdd)
df.write.mode(SaveMode.Overwrite).format("orc")
  .save("/tmp/myapp.orc/")
&lt;/PRE&gt;</description>
    <pubDate>Sat, 10 Dec 2016 06:18:11 GMT</pubDate>
    <dc:creator>christopher_w_m</dc:creator>
    <dc:date>2016-12-10T06:18:11Z</dc:date>
  </channel>
</rss>

