Support Questions

Find answers, ask questions, and share your expertise

Shark connectivity using Java and JDBC/other driver ?

avatar
Explorer

Hi,

I  want to run Shark query using Jave  code.

Does any one knows how we can connect to Shark using java and JDBC/other driver ?

 

Thanks,

Abhishek

1 ACCEPTED SOLUTION

avatar
Explorer

I got the solution.

I have referred https://cwiki.apache.org/confluence/display/Hive/HiveClient and made some changed.

Here's catch.

 

1.  We can use same  JDBC url for connecting to Hive/Shark.  you only need to change the port.

 

What I did?

1. I run hive on port 4544  and used below JDBC url in Java class HiveJdbc.java

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4544/default", "", "");

 

1. I run shark on port 4588  and used below JDBC url in Java class SharkJDBC.java

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4588/default", "", "");

 

 

Rest of code is same.

 

Here's code.

 - ---------------------------------- ---------------------------------- -------------------------------------------------------------------

import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;

public class SharkJdbcClient {
private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";  
/** * @param args * @throws SQLException   */
public static void main(String[] args) throws SQLException
{   
try
{   
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}  

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4588/default", "" , "");


Statement stmt = con.createStatement(); 
String tableName = "bank_tab1_cached";
System.out.println("Droppring the table : " + tableName);
stmt.executeQuery("drop table " + tableName);

ResultSet res = stmt.executeQuery("create table " + tableName+ " (empid int, name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY " +   "\",\"");  
// show tables 
String sql = "show tables '" + tableName + "'"; 
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
if (res.next())
{      System.out.println(res.getString(1));    }  
// describe table   
sql = "describe " + tableName; 
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);

while (res.next()) {    
 System.out.println(res.getString(1) + "-------" + res.getString(2));    }
// load data into table  
// NOTE: filepath has to be local to the hive server   
 
String filepath = "/home/abhi/Downloads/at_env_jar/emp_data.txt";
sql = "load data local inpath '" + filepath + "' into table " + tableName; 
System.out.println("Running: " + sql);  
res = stmt.executeQuery(sql);   
// select * query
sql = "select * from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql); 
while (res.next()) {      System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));    }
// regular hive query  
sql = "select count(1) from " + tableName;   
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {  
 System.out.println(res.getString(1));

String q1="CREATE TABLE one AS SELECT 1 AS one FROM " +  tableName  + " LIMIT 1";

int rows=0;
String c1="";
String c2="";
//insert into table emp_tab1 SELECT stack(3 , 1 , "row1" , 2 , "row2", 3 , "row3") AS (empid, name)FROM one;
System.out.println("Inserting records..... " );
String q2 = "insert into table "  +tableName +  " SELECT stack(3 , 1 ,\"row1\", 2 , \"row2\",3 , \"row3\") AS (empid, name) FROM one";
res = stmt.executeQuery(q2);
System.out.println("Successfully inserted.......... " );

}}

 - ---------------------------------- ---------------------------------- -------------------------------------------------------------------

 

 

Here's at.sh script used for runnign the code.

---------------------------------- ---------------------------------- -------------------------------------------------------------------

#!/bin/bash
HADOOP_HOME="/usr/lib/hadoop"
HIVE_HOME="/home/abhi/Downloads/hive-0.9.0-bin"
  
HADOOP_CORE="/home/abhi/Downloads/at_env_jar/Hadoop4.1.1/hadoop-core-0.20.203.0.jar"
CLASSPATH=.:$HADOOP_HOME:$HADOOP_CORE:$HIVE_HOME:$HIVE_HOME/conf
 
for i in ${HIVE_HOME}/lib/*.jar ; do
    CLASSPATH=$CLASSPATH:$i
done
 
java -cp $CLASSPATH HiveJdbcClient

 ---------------------------------- ---------------------------------- -------------------------------------------------------------------

 

 Compile your Java code and run the at.sh (with execute permission).

 

Cheers 🙂

Abhishek

 

View solution in original post

4 REPLIES 4

avatar
Master Collaborator

It should be possible to use the Hive client to access Shark. https://cwiki.apache.org/confluence/display/Hive/HiveClient

 

I have not tried it myself, so maybe others can weigh in with better info, like which version of Hive is used with 0.9. I think it is Hive 0.11, from looking at the build.

avatar
Explorer

Thanks for reply srowen.

 

Here's  setup Details

hive-0.11.0-bin

shark-0.8.1-bin-cdh4

Spark spark-0.9.0-incubating-bin-cdh4.

 

Shark is  throwing error as mentioned below while using Hive0.11.

Many user commented that Hive is having two varience Un-Patched and Patched.

I'm not sure where to get  Patched Hive and UnPatched Hive versions.

 

 

Shark env.sh

#!/usr/bin/env bash
# (Required) Amount of memory used per slave node. This should be in the same
# format as the JVM's -Xmx option, e.g. 300m or 1g.
export SPARK_MEM=200m
export SPARK_HOME="/home/training/AT_Installation/spark-0.9.0-incubating-bin-cdh4"
# (Required) Set the master program's memory
export SHARK_MASTER_MEM=200m

# (Required) Point to your Scala installation.
export SCALA_HOME="/home/training/AT_Installation/scala-2.9.3"

# (Required) Point to the patched Hive binary distribution
export HIVE_HOME="/home/training/AT_Installation/hive-0.11.0-bin"
export HADOOP_HOME="/usr/lib/hadoop"
# (Optional) Specify the location of Hive's configuration directory. By default,
# it points to $HIVE_HOME/conf
#export HIVE_CONF_DIR="$HIVE_HOME/conf"

# For running Shark in distributed mode, set the following:
#export HADOOP_HOME=""
#export SPARK_HOME=""
#export MASTER=""
# Only required if using Mesos:
#export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so

# Only required if run shark with spark on yarn
#export SHARK_EXEC_MODE=yarn
#export SPARK_ASSEMBLY_JAR=
#export SHARK_ASSEMBLY_JAR=

# (Optional) Extra classpath
#export SPARK_LIBRARY_PATH=""

# Java options
# On EC2, change the local.dir to /mnt/tmp
SPARK_JAVA_OPTS="-Dspark.local.dir=/tmp "
SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10 "
SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
export SPARK_JAVA_OPTS

 

Error

14/03/25 06:23:23 INFO HiveMetaStore.audit: ugi=training ip=unknown-ip-addr cmd=get_tables: db=default pat=.* 
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.hive.cli.CliDriver.getCommandCompletor()Ljline/Completor;
 at shark.SharkCliDriver$.main(SharkCliDriver.scala:184)
 at shark.SharkCliDriver.main(SharkCliDriver.scala)

 

thanks,

Abhishek

avatar
Master Collaborator

The error indicates that mismatching versions of Hive are being used. Not sure that helps. I am not familiar with Shark as a user myself.

 

If I'm not wrong, this isn't specific to the Cloudera distribution of Spark, so you may get better answers asking on the general Shark list.

 

avatar
Explorer

I got the solution.

I have referred https://cwiki.apache.org/confluence/display/Hive/HiveClient and made some changed.

Here's catch.

 

1.  We can use same  JDBC url for connecting to Hive/Shark.  you only need to change the port.

 

What I did?

1. I run hive on port 4544  and used below JDBC url in Java class HiveJdbc.java

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4544/default", "", "");

 

1. I run shark on port 4588  and used below JDBC url in Java class SharkJDBC.java

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4588/default", "", "");

 

 

Rest of code is same.

 

Here's code.

 - ---------------------------------- ---------------------------------- -------------------------------------------------------------------

import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;

public class SharkJdbcClient {
private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";  
/** * @param args * @throws SQLException   */
public static void main(String[] args) throws SQLException
{   
try
{   
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}  

Connection con = DriverManager.getConnection("jdbc:hive://localhost:4588/default", "" , "");


Statement stmt = con.createStatement(); 
String tableName = "bank_tab1_cached";
System.out.println("Droppring the table : " + tableName);
stmt.executeQuery("drop table " + tableName);

ResultSet res = stmt.executeQuery("create table " + tableName+ " (empid int, name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY " +   "\",\"");  
// show tables 
String sql = "show tables '" + tableName + "'"; 
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
if (res.next())
{      System.out.println(res.getString(1));    }  
// describe table   
sql = "describe " + tableName; 
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);

while (res.next()) {    
 System.out.println(res.getString(1) + "-------" + res.getString(2));    }
// load data into table  
// NOTE: filepath has to be local to the hive server   
 
String filepath = "/home/abhi/Downloads/at_env_jar/emp_data.txt";
sql = "load data local inpath '" + filepath + "' into table " + tableName; 
System.out.println("Running: " + sql);  
res = stmt.executeQuery(sql);   
// select * query
sql = "select * from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql); 
while (res.next()) {      System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));    }
// regular hive query  
sql = "select count(1) from " + tableName;   
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {  
 System.out.println(res.getString(1));

String q1="CREATE TABLE one AS SELECT 1 AS one FROM " +  tableName  + " LIMIT 1";

int rows=0;
String c1="";
String c2="";
//insert into table emp_tab1 SELECT stack(3 , 1 , "row1" , 2 , "row2", 3 , "row3") AS (empid, name)FROM one;
System.out.println("Inserting records..... " );
String q2 = "insert into table "  +tableName +  " SELECT stack(3 , 1 ,\"row1\", 2 , \"row2\",3 , \"row3\") AS (empid, name) FROM one";
res = stmt.executeQuery(q2);
System.out.println("Successfully inserted.......... " );

}}

 - ---------------------------------- ---------------------------------- -------------------------------------------------------------------

 

 

Here's at.sh script used for runnign the code.

---------------------------------- ---------------------------------- -------------------------------------------------------------------

#!/bin/bash
HADOOP_HOME="/usr/lib/hadoop"
HIVE_HOME="/home/abhi/Downloads/hive-0.9.0-bin"
  
HADOOP_CORE="/home/abhi/Downloads/at_env_jar/Hadoop4.1.1/hadoop-core-0.20.203.0.jar"
CLASSPATH=.:$HADOOP_HOME:$HADOOP_CORE:$HIVE_HOME:$HIVE_HOME/conf
 
for i in ${HIVE_HOME}/lib/*.jar ; do
    CLASSPATH=$CLASSPATH:$i
done
 
java -cp $CLASSPATH HiveJdbcClient

 ---------------------------------- ---------------------------------- -------------------------------------------------------------------

 

 Compile your Java code and run the at.sh (with execute permission).

 

Cheers 🙂

Abhishek