Support Questions
Find answers, ask questions, and share your expertise

Hive generic UDTF fails with array index out of bound error

Hive generic UDTF fails with array index out of bound error

New Contributor

This is regarding Hive generic UDTF.

The program's purpose is takes one string column as input and output should be multiple rows after splitting the input column(string) by space. Have generated the jar file and added the jar in hive shell , also created temporary function for the class name. while calling the function gtting array index out of bound error.

Code :

 

package com.suba.customHiveUdfs;

import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class MyUdtf extends GenericUDTF {
    ArrayList<String> colList = new ArrayList<>(1);
    ArrayList<ObjectInspector> oiList = new ArrayList<>(1);
    PrimitiveObjectInspector poi = null;
    @Override
    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
        // TODO Auto-generated method stub
        if (argOIs.length > 1) {
            throw new UDFArgumentException("invalid argument");
        }
        if (argOIs[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentException("primitive expected");
        }
        if (((PrimitiveObjectInspector) argOIs[0])
                .getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
            throw new UDFArgumentException("not string type");
        }        poi = (PrimitiveObjectInspector) argOIs[0];        colList.add("name");        oiList.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(colList, oiList);
    }
    @Override
    public void process(Object[] arg0) throws HiveException {
        String name = ((PrimitiveObjectInspector) poi).getPrimitiveJavaObject(arg0[0]).toString();
        String[] tokens = name.split(" ");
        for (String x : tokens) {
            Object[] objects = new Object[] { x };            forward(objects);
        }
    }
    @Override
    public void close() throws HiveException {
    }
}

Caused by: java.lang.ArrayIndexOutOfBoundsException: 1    at java.util.Arrays$ArrayList.get(Arrays.java:3841)    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:417)    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:592)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.forwardUDTFOutput(UDTFOperator.java:125)    at org.apache.hadoop.hive.ql.udf.generic.UDTFCollector.collect(UDTFCollector.java:45)    at org.apache.hadoop.hive.ql.udf.generic.GenericUDTF.forward(GenericUDTF.java:107)    at com.suba.customHiveUdfs.MyUdtf.process(MyUdtf.java:61)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.processOp(UDTFOperator.java:108)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:87)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:92)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:539)
    ... 9 more


The error message shown below: Getting array index out of bound error.

Caused by: java.lang.ArrayIndexOutOfBoundsException: 1    at java.util.Arrays$ArrayList.get(Arrays.java:3841)    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:417)    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:592)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.forwardUDTFOutput(UDTFOperator.java:125)    at org.apache.hadoop.hive.ql.udf.generic.UDTFCollector.collect(UDTFCollector.java:45)    at org.apache.hadoop.hive.ql.udf.generic.GenericUDTF.forward(GenericUDTF.java:107)    at com.suba.customHiveUdfs.MyUdtf.process(MyUdtf.java:61)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.processOp(UDTFOperator.java:108)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:87)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:92)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:539)
    ... 9 more