Created on 03-21-2018 10:57 PM - edited 09-16-2022 06:00 AM
Hi ,
I have created a UDTF , that should take string(comma or space seperated) and should return them in multiple records .
i.e.
input1 : "this,is,me"
Output1 : this
is
me
input2 : "this is me"
Output2 : this
is
me
But I am not getting the output from UDTF . I have gone through all the related posts but nothing worked out.
Please advise.
Note : I am creating the jar, creating temp function and then quering like select fun(string).
Please find below the code snippet :
package obj.udf;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class ParseString extends GenericUDF {
private PrimitiveObjectInspector stringOI = null;
//Collector collector = null;
@Override
public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException(
"Only one argument is allowed");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
&& ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException(
"Only Primitive types are accepted");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];
// output inspectors -- an object with three fields!
List<String> fieldName = new ArrayList<String>(1);
List<ObjectInspector> fieldOI = new ArrayList<ObjectInspector>(1);
fieldName.add("selection");
fieldOI.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(
fieldName, fieldOI);
}
public ArrayList<Object[]> processInputRecord(String selection) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (selection == null || selection.isEmpty()) {
return result;
}
String[] tokens = null;
if(selection.contains(",")) {
tokens=selection.split(",");
}
if(selection.contains(" ")) {
tokens=selection.split("\\s+");
}
for(String s : tokens) {
if(!s.isEmpty()) {
result.add(new Object[] {s});
}
}
return result;
}
public void process(Object[] record) throws HiveException {
final String selection = stringOI.getPrimitiveJavaObject(record[0]).toString();
ArrayList<Object[]> results = processInputRecord(selection);
Iterator<Object[]> it = results.iterator();
while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}
private void forward(Object[] r) {
// TODO Auto-generated method stub
}
public void close() throws HiveException {
// do nothing
}
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
// TODO Auto-generated method stub
return null;
}
@Override
public String getDisplayString(String[] arg0) {
// TODO Auto-generated method stub
return null;
}
}
Created on 07-16-2019 12:06 PM - edited 07-16-2019 12:14 PM
try it by removing the evaluate(),forward() and getDisplay() methods.
Also , extend from class GenericUDTF, if you are writing a UDTF.
hope this helps.Refer to below working code.
package org.kp.atg;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class GenericWhereUDTF extends GenericUDTF {
private PrimitiveObjectInspector stringOI = null;
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException("GenericWhereUDTF() takes exactly one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException("GenericWhereUDTF() takes a string as a parameter");
}
// input inspectors
stringOI = (PrimitiveObjectInspector) args[0];
// output inspectors -- an object with three fields!
List<String> fieldNames = new ArrayList<String>(2);
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2);
fieldNames.add("id");
fieldNames.add("loc_number");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
public ArrayList<Object[]> processInputRecord(String id) {
ArrayList<Object[]> result = new ArrayList<Object[]>();
// ignoring null or empty input
if (id == null || id.isEmpty()) {
return result;
}
String[] tokens = id.split("\\s+");
if (tokens.length == 2) {
result.add(new Object[] { tokens[0], tokens[1] });
} else if (tokens.length == 3) {
result.add(new Object[] { tokens[0], tokens[1] });
result.add(new Object[] { tokens[0], tokens[2] });
}
return result;
}
@Override
public void close() throws HiveException {
// TODO Auto-generated method stub
}
@Override
public void process(Object[] record) throws HiveException {
// final String name = stringOI.getPrimitiveJavaObject(record[0]).toString();
//ArrayList<Object[]> results = processInputRecord(name);
ArrayList<Object[]> results = new ArrayList<Object[]>();
results.add(new Object[] { "123", "value1" });
results.add(new Object[] { "111", "value2" });
results.add(new Object[] { "111", "value3" });
Iterator<Object[]> it = results.iterator();
while (it.hasNext()) {
Object[] r = it.next();
forward(r);
}
}
}