Support Questions
Find answers, ask questions, and share your expertise
Announcements
Alert: Welcome to the Unified Cloudera Community. Former HCC members be sure to read and learn how to activate your account here.

HIve UDTF not working

HIve UDTF not working

New Contributor

Hi ,

 

I have created a UDTF , that should take string(comma or space seperated) and should return them in multiple records . 

i.e.

input1 : "this,is,me"

Output1 : this

is

me

input2 : "this is   me"

Output2 : this

is

me

 

But I am not getting the output from UDTF . I have gone through all the related posts but nothing worked out.

 

Please advise.

 

Note : I am creating the jar, creating temp function and then quering like select fun(string).

 

Please find below the code snippet :

 

package obj.udf;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class ParseString extends GenericUDF {
private PrimitiveObjectInspector stringOI = null;
//Collector collector = null;
@Override

public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException(
"Only one argument is allowed");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
&& ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException(
"Only Primitive types are accepted");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];
// output inspectors -- an object with three fields!
List<String> fieldName = new ArrayList<String>(1);
List<ObjectInspector> fieldOI = new ArrayList<ObjectInspector>(1);
fieldName.add("selection");
fieldOI.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(
fieldName, fieldOI);
}

public ArrayList<Object[]> processInputRecord(String selection) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (selection == null || selection.isEmpty()) {
return result;
}

String[] tokens = null;

if(selection.contains(",")) {
tokens=selection.split(",");
}
if(selection.contains(" ")) {
tokens=selection.split("\\s+");
}

for(String s : tokens) {
if(!s.isEmpty()) {
result.add(new Object[] {s});
}
}
return result;
}
public void process(Object[] record) throws HiveException {
final String selection = stringOI.getPrimitiveJavaObject(record[0]).toString();
ArrayList<Object[]> results = processInputRecord(selection);
Iterator<Object[]> it = results.iterator();
while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}
private void forward(Object[] r) {
// TODO Auto-generated method stub

}

public void close() throws HiveException {
// do nothing
}

@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
// TODO Auto-generated method stub
return null;
}

@Override
public String getDisplayString(String[] arg0) {
// TODO Auto-generated method stub
return null;
}
}

1 REPLY 1
Highlighted

Re: HIve UDTF not working

New Contributor

try it by removing the evaluate(),forward() and getDisplay() methods.

 

Also , extend from class GenericUDTF, if you are writing a UDTF.

 

hope this helps.Refer to below working code.

 

package org.kp.atg;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

 

public class GenericWhereUDTF extends GenericUDTF {
private PrimitiveObjectInspector stringOI = null;

@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {

if (args.length != 1) {
throw new UDFArgumentException("GenericWhereUDTF() takes exactly one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException("GenericWhereUDTF() takes a string as a parameter");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];

// output inspectors -- an object with three fields!
List<String> fieldNames = new ArrayList<String>(2);
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2);
fieldNames.add("id");
fieldNames.add("loc_number");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}

public ArrayList<Object[]> processInputRecord(String id) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (id == null || id.isEmpty()) {
return result;
}
String[] tokens = id.split("\\s+");
if (tokens.length == 2) {
result.add(new Object[] { tokens[0], tokens[1] });

} else if (tokens.length == 3) {
result.add(new Object[] { tokens[0], tokens[1] });
result.add(new Object[] { tokens[0], tokens[2] });
}

return result;
}

@Override
public void close() throws HiveException {
// TODO Auto-generated method stub

}

@Override
public void process(Object[] record) throws HiveException {
// final String name = stringOI.getPrimitiveJavaObject(record[0]).toString();
//ArrayList<Object[]> results = processInputRecord(name);
ArrayList<Object[]> results = new ArrayList<Object[]>();

results.add(new Object[] { "123", "value1" });
results.add(new Object[] { "111", "value2" });
results.add(new Object[] { "111", "value3" });

Iterator<Object[]> it = results.iterator();

while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}

}

Don't have an account?
Coming from Hortonworks? Activate your account here