Support Questions

Find answers, ask questions, and share your expertise

HIve UDTF not working

avatar
New Contributor

Hi ,

 

I have created a UDTF , that should take string(comma or space seperated) and should return them in multiple records . 

i.e.

input1 : "this,is,me"

Output1 : this

is

me

input2 : "this is   me"

Output2 : this

is

me

 

But I am not getting the output from UDTF . I have gone through all the related posts but nothing worked out.

 

Please advise.

 

Note : I am creating the jar, creating temp function and then quering like select fun(string).

 

Please find below the code snippet :

 

package obj.udf;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class ParseString extends GenericUDF {
private PrimitiveObjectInspector stringOI = null;
//Collector collector = null;
@Override

public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException(
"Only one argument is allowed");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
&& ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException(
"Only Primitive types are accepted");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];
// output inspectors -- an object with three fields!
List<String> fieldName = new ArrayList<String>(1);
List<ObjectInspector> fieldOI = new ArrayList<ObjectInspector>(1);
fieldName.add("selection");
fieldOI.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(
fieldName, fieldOI);
}

public ArrayList<Object[]> processInputRecord(String selection) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (selection == null || selection.isEmpty()) {
return result;
}

String[] tokens = null;

if(selection.contains(",")) {
tokens=selection.split(",");
}
if(selection.contains(" ")) {
tokens=selection.split("\\s+");
}

for(String s : tokens) {
if(!s.isEmpty()) {
result.add(new Object[] {s});
}
}
return result;
}
public void process(Object[] record) throws HiveException {
final String selection = stringOI.getPrimitiveJavaObject(record[0]).toString();
ArrayList<Object[]> results = processInputRecord(selection);
Iterator<Object[]> it = results.iterator();
while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}
private void forward(Object[] r) {
// TODO Auto-generated method stub

}

public void close() throws HiveException {
// do nothing
}

@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
// TODO Auto-generated method stub
return null;
}

@Override
public String getDisplayString(String[] arg0) {
// TODO Auto-generated method stub
return null;
}
}

1 REPLY 1

avatar
Explorer

try it by removing the evaluate(),forward() and getDisplay() methods.

 

Also , extend from class GenericUDTF, if you are writing a UDTF.

 

hope this helps.Refer to below working code.

 

package org.kp.atg;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

 

public class GenericWhereUDTF extends GenericUDTF {
private PrimitiveObjectInspector stringOI = null;

@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {

if (args.length != 1) {
throw new UDFArgumentException("GenericWhereUDTF() takes exactly one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException("GenericWhereUDTF() takes a string as a parameter");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];

// output inspectors -- an object with three fields!
List<String> fieldNames = new ArrayList<String>(2);
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2);
fieldNames.add("id");
fieldNames.add("loc_number");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}

public ArrayList<Object[]> processInputRecord(String id) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (id == null || id.isEmpty()) {
return result;
}
String[] tokens = id.split("\\s+");
if (tokens.length == 2) {
result.add(new Object[] { tokens[0], tokens[1] });

} else if (tokens.length == 3) {
result.add(new Object[] { tokens[0], tokens[1] });
result.add(new Object[] { tokens[0], tokens[2] });
}

return result;
}

@Override
public void close() throws HiveException {
// TODO Auto-generated method stub

}

@Override
public void process(Object[] record) throws HiveException {
// final String name = stringOI.getPrimitiveJavaObject(record[0]).toString();
//ArrayList<Object[]> results = processInputRecord(name);
ArrayList<Object[]> results = new ArrayList<Object[]>();

results.add(new Object[] { "123", "value1" });
results.add(new Object[] { "111", "value2" });
results.add(new Object[] { "111", "value3" });

Iterator<Object[]> it = results.iterator();

while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}

}