New Contributor
Posts: 5
Registered: ‎02-13-2018

SQL Queries in UDF with impala

[ Edited ]

Hi All,


I want to run sql query  like select * from table where username = 'xyz' inside an UDF and get the results.Username  'xyz' will be passed as an argument to the function.Is this possible with an UDF.Can any one let me know how to do this in UDF .We are using Impala and requirement is to create a UDF for this instead of going for joins.


Thanks in advance

New Contributor
Posts: 5
Registered: ‎02-13-2018

Re: SQL Queries in UDF with impala

I am able to achieve it in udf.


Thank You.

New Contributor
Posts: 1
Registered: ‎02-22-2018

Re: SQL Queries in UDF with impala

Could you share how you achieve it?

New Contributor
Posts: 5
Registered: ‎02-13-2018

Re: SQL Queries in UDF with impala

[ Edited ]

please find the code.UDF will take username as input and gives customer details to which user has access.

You can excute in hive or impala just chaning the connection string with different ports.


import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hive.jdbc.HiveDriver;

import com.cloudera.impala.jdbc41.Driver;

public class GetData extends UDF {
    Text t = new Text();
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw);
    private static final String IMPALA_HOST = "";//server name on which impala is running
    private static final String IMPALA_JDBC_PORT = "21050";

    //     Define  a    string     as the     connection     URL
    //public static final String IMPALA_CONNECTION_URL = "jdbc:hive2://servername:10000/;";
    public static final String IMPALA_CONNECTION_URL = "jdbc:impala://" + IMPALA_HOST + ":" + IMPALA_JDBC_PORT;
    //public static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    public static final String JDBC_DRIVER_NAME = "com.cloudera.impala.jdbc41.Driver";
    public Text evaluate(Text input){
    String PREP_QUERY = "SELECT  customer from GetCustomer where userid ='"+input+"'";
    String customer="";

    Connection connection = null;
    Statement statement = null;
    ResultSet resultSet = null;
    String strack1="";
    String strack2="";
    String strack3="";

    try {
        // Register the jdbc driver here

        // Ensure that the user provided is present in the cluster/sandbox
        connection = DriverManager.getConnection(IMPALA_CONNECTION_URL);

        statement = connection.createStatement();

        System.out.println("Query: " + PREP_QUERY);

        // Execute and measure the execution time for query
        //long startTime = System.currentTimeMillis();
        resultSet = statement.executeQuery(PREP_QUERY);
        //customer = resultSet.getString(1);
        // Print the result in console
        ResultSetMetaData rsmd = resultSet.getMetaData();
        int columnsNumber = rsmd.getColumnCount();
        int counter = 0;
        while ( {
            for (int i = 1; i <= columnsNumber; i++) {
                if (i > 1)
                    //System.out.print(",  ");
                customer = resultSet.getString(i);
                //System.out.print(rsmd.getColumnName(i) + ":" + customer);

        //long endTime = System.currentTimeMillis();

        // Print query execution time
        //System.out.println(String.format("\t %d found in %d ms", counter, (endTime - startTime)));
    catch(SQLException e){
        // Handle errors that are encountered during the interaction with the data store
    catch (Exception e) {
        // Handle other errors
    }finally {
        try {
            if (resultSet!= null) {
        }catch (Exception s1) {
        try {
            if (statement != null) {
        } catch (SQLException s2) {
        try {
            if (connection != null) {
        } catch (SQLException s3) {
        } // End try
    return t;