<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Can I use SparkSQL on a cluster using Hive on Spark? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282189#M209820</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/1095"&gt;@av&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here the links for the Hive and Spark interpreter doc's :&lt;/P&gt;&lt;P&gt;&lt;A href="https://zeppelin.apache.org/docs/0.8.2/interpreter/hive.html" target="_blank"&gt;https://zeppelin.apache.org/docs/0.8.2/interpreter/hive.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://zeppelin.apache.org/docs/0.8.2/interpreter/spark.html" target="_blank"&gt;https://zeppelin.apache.org/docs/0.8.2/interpreter/spark.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Helmi KHALIFA&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 06 Nov 2019 10:08:58 GMT</pubDate>
    <dc:creator>helmi_khalifa</dc:creator>
    <dc:date>2019-11-06T10:08:58Z</dc:date>
    <item>
      <title>Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282174#M209808</link>
      <description>&lt;P&gt;I am using CDH 6.1.1 Cluster.&lt;/P&gt;&lt;P&gt;Cluster is configured to use Spark as the execution engine for Hive.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Is there anything wrong with using SparkSQL on this Cluster?&lt;/P&gt;&lt;P&gt;Is it ok to create Hive Tables and change data using SparkSQL?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since SparkSQL uses the Hive Metastore, I suspect that there may be a conflict between SparkSQL and Hive on Spark.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In addition, please refer to documentation on how to intergrate Cloudera CDH Hive with Apache Zeppelin's Spark interpreter.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 06 Nov 2019 06:36:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282174#M209808</guid>
      <dc:creator>avengers</dc:creator>
      <dc:date>2019-11-06T06:36:02Z</dc:date>
    </item>
    <item>
      <title>Re: Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282189#M209820</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/1095"&gt;@av&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here the links for the Hive and Spark interpreter doc's :&lt;/P&gt;&lt;P&gt;&lt;A href="https://zeppelin.apache.org/docs/0.8.2/interpreter/hive.html" target="_blank"&gt;https://zeppelin.apache.org/docs/0.8.2/interpreter/hive.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://zeppelin.apache.org/docs/0.8.2/interpreter/spark.html" target="_blank"&gt;https://zeppelin.apache.org/docs/0.8.2/interpreter/spark.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Helmi KHALIFA&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 06 Nov 2019 10:08:58 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282189#M209820</guid>
      <dc:creator>helmi_khalifa</dc:creator>
      <dc:date>2019-11-06T10:08:58Z</dc:date>
    </item>
    <item>
      <title>Re: Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282309#M209903</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks. However, I have already read them.&lt;/P&gt;&lt;P&gt;I'am already connecting to Hive from Zeppelin using JDBC.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to query Hive Table with SparkSQL.&lt;/P&gt;&lt;P&gt;And I'm wondering if the metastore won't crash if I use it in a Cluster using HiveOnSpark.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For example.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;%spark
val df = spark.read.format("csv").option("header", "true")
.option("inferSchema", "true").load("/somefile.csv")

df.createOrReplaceTempView("csvTable");

%spark.sql
select * 
from csvTable lt
join hiveTable rt
on lt.col = rt.col&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Nov 2019 06:19:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282309#M209903</guid>
      <dc:creator>avengers</dc:creator>
      <dc:date>2019-11-07T06:19:33Z</dc:date>
    </item>
    <item>
      <title>Re: Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282515#M210025</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/28768"&gt;@avengers&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;U will need to share variables between two zeppelin interpreters and i dont think that we can do it between spark and sparkSQL.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I find an easier way by using sqlContext inside the same interpreter %spark:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%spark&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;val df = spark.read.format("csv").option("header", "true")&lt;BR /&gt;.option("inferSchema", "true").load("/somefile.csv")&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;df.createOrReplaceTempView("csvTable");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;val resultat = sqlContext.sql("select * from csvTable lt join hiveTable rt on lt.col = rt.col")&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;resultat.show()&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried it and it works !&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Helmi KHALIFA&lt;/P&gt;</description>
      <pubDate>Fri, 08 Nov 2019 16:55:10 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282515#M210025</guid>
      <dc:creator>helmi_khalifa</dc:creator>
      <dc:date>2019-11-08T16:55:10Z</dc:date>
    </item>
    <item>
      <title>Re: Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282964#M210316</link>
      <description>&lt;P&gt;hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/28768"&gt;@avengers&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If it works for you, would you be kind enough to accept the answer please ?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Helmi KHALIFA&lt;/P&gt;</description>
      <pubDate>Thu, 14 Nov 2019 10:42:29 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282964#M210316</guid>
      <dc:creator>helmi_khalifa</dc:creator>
      <dc:date>2019-11-14T10:42:29Z</dc:date>
    </item>
    <item>
      <title>Re: Can I use SparkSQL on a cluster using Hive on Spark?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282967#M210319</link>
      <description>&lt;P&gt;Hey &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/28768"&gt;@avengers&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Just thought, this could add some more value to this question here.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Spark SQL uses a Hive Metastore to manage the metadata of persistent relational entities (e.g. databases, tables, columns, partitions) in a relational database (for fast access) [1].&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also, I don't think there would be a MetaStore crash if we use it along with HiveOnSpark.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;[1] &lt;A href="https://jaceklaskowski.gitbooks.io/mastering-spark-sql/spark-sql-hive-metastore.html" target="_blank"&gt;https://jaceklaskowski.gitbooks.io/mastering-spark-sql/spark-sql-hive-metastore.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 14 Nov 2019 10:54:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Can-I-use-SparkSQL-on-a-cluster-using-Hive-on-Spark/m-p/282967#M210319</guid>
      <dc:creator>gsthina</dc:creator>
      <dc:date>2019-11-14T10:54:24Z</dc:date>
    </item>
  </channel>
</rss>

