Created on 02-13-2018 03:16 AM - edited 02-13-2018 04:11 AM
Hi All,
I want to run sql query like select * from table where username = 'xyz' inside an UDF and get the results.Username 'xyz' will be passed as an argument to the function.Is this possible with an UDF.Can any one let me know how to do this in UDF .We are using Impala and requirement is to create a UDF for this instead of going for joins.
Thanks in advance
Created 02-15-2018 02:25 AM
I am able to achieve it in udf.
Thank You.
Created 02-22-2018 07:46 PM
Could you share how you achieve it?
Created on 02-23-2018 01:44 AM - edited 02-23-2018 01:48 AM
please find the code.UDF will take username as input and gives customer details to which user has access.
You can excute in hive or impala just chaning the connection string with different ports.
import java.io.PrintWriter;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import org.apache.hive.jdbc.HiveDriver;
import com.cloudera.impala.jdbc41.Driver;
public class GetData extends UDF {
Text t = new Text();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
private static final String IMPALA_HOST = "";//server name on which impala is running
private static final String IMPALA_JDBC_PORT = "21050";
// Define a string as the connection URL
//public static final String IMPALA_CONNECTION_URL = "jdbc:hive2://servername:10000/;";
public static final String IMPALA_CONNECTION_URL = "jdbc:impala://" + IMPALA_HOST + ":" + IMPALA_JDBC_PORT;
//public static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
public static final String JDBC_DRIVER_NAME = "com.cloudera.impala.jdbc41.Driver";
public Text evaluate(Text input){
String PREP_QUERY = "SELECT customer from GetCustomer where userid ='"+input+"'";
String customer="";
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
String strack1="";
String strack2="";
String strack3="";
try {
// Register the jdbc driver here
Class.forName(JDBC_DRIVER_NAME);
// Ensure that the user provided is present in the cluster/sandbox
connection = DriverManager.getConnection(IMPALA_CONNECTION_URL);
statement = connection.createStatement();
System.out.println("Query: " + PREP_QUERY);
// Execute and measure the execution time for query
//long startTime = System.currentTimeMillis();
resultSet = statement.executeQuery(PREP_QUERY);
//customer="Hello";
//customer = resultSet.getString(1);
//System.out.println("cust"+customer);
// Print the result in console
ResultSetMetaData rsmd = resultSet.getMetaData();
int columnsNumber = rsmd.getColumnCount();
int counter = 0;
while (resultSet.next()) {
counter++;
for (int i = 1; i <= columnsNumber; i++) {
if (i > 1)
//System.out.print(", ");
System.out.println("");
customer = resultSet.getString(i);
//System.out.print(rsmd.getColumnName(i) + ":" + customer);
}
//System.out.println("");
}
//long endTime = System.currentTimeMillis();
// Print query execution time
//System.out.println(String.format("\t %d found in %d ms", counter, (endTime - startTime)));
}
catch(SQLException e){
// Handle errors that are encountered during the interaction with the data store
e.printStackTrace(pw);
strack3=sw.toString();
}
catch (Exception e) {
// Handle other errors
e.printStackTrace(pw);
strack2=sw.toString();
}finally {
try {
if (resultSet!= null) {
resultSet.close();
}
}catch (Exception s1) {
s1.printStackTrace(pw);
strack1=sw.toString();
}
try {
if (statement != null) {
statement.close();
}
} catch (SQLException s2) {
s2.printStackTrace(pw);
}
try {
if (connection != null) {
connection.close();
}
} catch (SQLException s3) {
s3.printStackTrace(pw);
} // End try
}
//System.out.println(customer);
t.set(StringUtils.strip(customer.toString()));
//System.out.println(t);
return t;
}
}
Created 02-25-2018 10:28 PM
It's great job. Thank you.