<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Spark3 connection to HIVE ACID Tables in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/348833#M235462</link>
    <description>&lt;P&gt;&lt;STRONG&gt;Hi guys,&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;I have a &lt;STRONG&gt;Data lake (Hive Managed tables base)&lt;/STRONG&gt; and I would like to do an Incremental approach to the &lt;STRONG&gt;Warehouse (Hive Managed tables base)&lt;/STRONG&gt; using spark v3.2, and I faced an issue connection to Hive Managed tables with spark3.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And I would like to know,&lt;/P&gt;&lt;P&gt;How to connect to Hive ACID Tables? Using JDBC if yes how? Or there are other ways?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Thank you!&lt;/STRONG&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 26 Jul 2022 16:32:27 GMT</pubDate>
    <dc:creator>Asim-</dc:creator>
    <dc:date>2022-07-26T16:32:27Z</dc:date>
    <item>
      <title>Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/348833#M235462</link>
      <description>&lt;P&gt;&lt;STRONG&gt;Hi guys,&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;I have a &lt;STRONG&gt;Data lake (Hive Managed tables base)&lt;/STRONG&gt; and I would like to do an Incremental approach to the &lt;STRONG&gt;Warehouse (Hive Managed tables base)&lt;/STRONG&gt; using spark v3.2, and I faced an issue connection to Hive Managed tables with spark3.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And I would like to know,&lt;/P&gt;&lt;P&gt;How to connect to Hive ACID Tables? Using JDBC if yes how? Or there are other ways?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Thank you!&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 26 Jul 2022 16:32:27 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/348833#M235462</guid>
      <dc:creator>Asim-</dc:creator>
      <dc:date>2022-07-26T16:32:27Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349139#M235534</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96215"&gt;@Asim-&lt;/a&gt; &lt;SPAN&gt;Hive Warehouse Connector (HWC)&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;securely accesses Hive-managed (ACID Tables) from Spark. You need to use HWC software to query Apache Hive-managed tables from Apache Spark. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;As of now, HWC supports Spark2&amp;nbsp;in CDP 7.1.7. &amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;HWC is not yet a supported feature for Spark3.2 / CDS 3.2 in CDP 7.1.7. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;We are expecting HWC for Spark3 to be included in our upcoming CDS 3.3 in CDP 7.1.8.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 30 Jul 2022 02:50:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349139#M235534</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2022-07-30T02:50:11Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349142#M235537</link>
      <description>&lt;P&gt;Thank you&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/67146"&gt;@jagadeesan&lt;/a&gt;&amp;nbsp;for your reply,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As far as I know, HWC does not support INSERT/UPDATE in Hive ACID Tables,&lt;/P&gt;&lt;P&gt;Correct me if I'm wrong.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also is there any way to connect to Hive ACID tables now for spark 3&lt;/P&gt;&lt;P&gt;instead of HWC.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;</description>
      <pubDate>Sat, 30 Jul 2022 10:09:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349142#M235537</guid>
      <dc:creator>Asim-</dc:creator>
      <dc:date>2022-07-30T10:09:33Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349154#M235542</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96215"&gt;@Asim-&lt;/a&gt;&amp;nbsp; Run CREATE, UPDATE, DELETE, INSERT, and MERGE statements in this way:&lt;/P&gt;&lt;PRE&gt;hive.executeUpdate("INSERT INTO table_name (column1, column2,...) VALUES (value1, value2,...)")&lt;/PRE&gt;&lt;P&gt;For more details, you can refer to HWC&amp;nbsp;Read and write operations &lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.7/integrating-hive-and-bi/topics/hive-read-write-operations.html#pnavId2" target="_blank" rel="noopener"&gt;documentation&lt;/A&gt;.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Other than HWC, we don't have any other way to connect Hive ACID tables&amp;nbsp;&lt;SPAN&gt;from Apache Spark, as mentioned early we are expecting this feature will be released in our upcoming CDS 3.3 release.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 30 Jul 2022 16:59:33 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349154#M235542</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2022-07-30T16:59:33Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349158#M235546</link>
      <description>&lt;P&gt;Thank you&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/67146"&gt;@jagadeesan&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;But, is it possible to connect to the Hive via JDBC from spark 3.x ?&lt;/P&gt;</description>
      <pubDate>Sat, 30 Jul 2022 17:36:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349158#M235546</guid>
      <dc:creator>Asim-</dc:creator>
      <dc:date>2022-07-30T17:36:25Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349169#M235547</link>
      <description>&lt;P&gt;Yes, for more details you can refer &lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.7/integrating-hive-and-bi/topics/hive_specify_the_jdbc_connection_string.html" target="_self"&gt;here&lt;/A&gt;.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 30 Jul 2022 23:30:30 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349169#M235547</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2022-07-30T23:30:30Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349308#M235610</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96215"&gt;@Asim-&lt;/a&gt;,&amp;nbsp;Has the reply helped resolve your issue? If so, please mark the appropriate reply as the solution, as it will make it easier for others to find the answer in the future.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Aug 2022 06:16:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349308#M235610</guid>
      <dc:creator>VidyaSargur</dc:creator>
      <dc:date>2022-08-02T06:16:32Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349374#M235631</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/67146"&gt;@jagadeesan&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;I am trying to connect to hive with spark3 via JDBC Hive driver (HiveJDBC42)&lt;/P&gt;&lt;P&gt;And I am getting the bellow error:&lt;/P&gt;&lt;LI-CODE lang="java"&gt;import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder().appName("Spark - Hive").config("spark.sql.warehouse.dir", "/warehouse/tablespace/managed/hive").enableHiveSupport().getOrCreate()
val table_users = spark.read.format("jdbc"). 
              option("url","hive"). 
              option("url", "jdbc:hive2://127.0.0.1:2181:2181;password=****;principal=hive/_HOST@Example.com;serviceDiscoveryMode=zooKeeper;ssl=1;user=user1;zooKeeperNamespace=hiveserver2"). 
              option("driver","com.cloudera.hive.jdbc.HS2Driver"). 
              option("query","select * from test_db.users LIMIT 1").
              option("fetchsize","20"). 
              load()&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;java.sql.SQLException: [Cloudera][JDBC](11380) Null pointer exception.
  at com.cloudera.hiveserver2.hive.core.HiveJDBCConnection.setZookeeperServiceDiscovery(Unknown Source)
  at com.cloudera.hiveserver2.hive.core.HiveJDBCConnection.readServiceDiscoverySettings(Unknown Source)
  at com.cloudera.hiveserver2.hivecommon.core.HiveJDBCCommonConnection.readServiceDiscoverySettings(Unknown Source)
  at com.cloudera.hiveserver2.hivecommon.core.HiveJDBCCommonConnection.establishConnection(Unknown Source)
  at com.cloudera.hiveserver2.jdbc.core.LoginTimeoutConnection.connect(Unknown Source)
  at com.cloudera.hiveserver2.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
  at com.cloudera.hiveserver2.jdbc.common.AbstractDriver.connect(Unknown Source)
  at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:49)
  at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.create(ConnectionProvider.scala:77)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$createConnectionFactory$1(JdbcUtils.scala:64)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.getQueryOutputSchema(JDBCRDD.scala:62)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:57)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:239)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:36)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274)
  at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:174)
  ... 54 elided
Caused by: java.lang.NullPointerException&lt;/LI-CODE&gt;</description>
      <pubDate>Tue, 02 Aug 2022 14:24:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349374#M235631</guid>
      <dc:creator>Asim-</dc:creator>
      <dc:date>2022-08-02T14:24:46Z</dc:date>
    </item>
    <item>
      <title>Re: Spark3 connection to HIVE ACID Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349390#M235636</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/96215"&gt;@Asim-&lt;/a&gt;&amp;nbsp;JDBC also you need HWC for Managed tables. Here is the example for &lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.7/integrating-hive-and-bi/topics/hive_configure_spark_hs2.html" target="_blank" rel="noopener"&gt;Spark2&lt;/A&gt;, but as mentioned earlier Spark3&amp;nbsp;&lt;SPAN&gt;we don't have any other way to connect Hive ACID tables&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;from Apache Spark other than HWC and it&amp;nbsp;is not yet a supported feature for Spark3.2 / CDS 3.2 in CDP 7.1.7. Marking this thread close, if you have any issues related to external tables kindly start a new Support-Questions thread for better tracking of the issue and documentation. Thanks&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 03 Aug 2022 01:34:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Spark3-connection-to-HIVE-ACID-Tables/m-p/349390#M235636</guid>
      <dc:creator>jagadeesan</dc:creator>
      <dc:date>2022-08-03T01:34:32Z</dc:date>
    </item>
  </channel>
</rss>

