Support Questions

Find answers, ask questions, and share your expertise

Who agreed with this topic

HIve UDTF not working

avatar
New Contributor

Hi ,

 

I have created a UDTF , that should take string(comma or space seperated) and should return them in multiple records . 

i.e.

input1 : "this,is,me"

Output1 : this

is

me

input2 : "this is   me"

Output2 : this

is

me

 

But I am not getting the output from UDTF . I have gone through all the related posts but nothing worked out.

 

Please advise.

 

Note : I am creating the jar, creating temp function and then quering like select fun(string).

 

Please find below the code snippet :

 

package obj.udf;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class ParseString extends GenericUDF {
private PrimitiveObjectInspector stringOI = null;
//Collector collector = null;
@Override

public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException(
"Only one argument is allowed");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
&& ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException(
"Only Primitive types are accepted");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];
// output inspectors -- an object with three fields!
List<String> fieldName = new ArrayList<String>(1);
List<ObjectInspector> fieldOI = new ArrayList<ObjectInspector>(1);
fieldName.add("selection");
fieldOI.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(
fieldName, fieldOI);
}

public ArrayList<Object[]> processInputRecord(String selection) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (selection == null || selection.isEmpty()) {
return result;
}

String[] tokens = null;

if(selection.contains(",")) {
tokens=selection.split(",");
}
if(selection.contains(" ")) {
tokens=selection.split("\\s+");
}

for(String s : tokens) {
if(!s.isEmpty()) {
result.add(new Object[] {s});
}
}
return result;
}
public void process(Object[] record) throws HiveException {
final String selection = stringOI.getPrimitiveJavaObject(record[0]).toString();
ArrayList<Object[]> results = processInputRecord(selection);
Iterator<Object[]> it = results.iterator();
while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}
private void forward(Object[] r) {
// TODO Auto-generated method stub

}

public void close() throws HiveException {
// do nothing
}

@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
// TODO Auto-generated method stub
return null;
}

@Override
public String getDisplayString(String[] arg0) {
// TODO Auto-generated method stub
return null;
}
}

Who agreed with this topic