Support Questions
Find answers, ask questions, and share your expertise
Announcements
Alert: Welcome to the Unified Cloudera Community. Former HCC members be sure to read and learn how to activate your account here.

Hive generic UDTF fails with array index out of bound error

Hive generic UDTF fails with array index out of bound error

New Contributor

This is regarding Hive generic UDTF.

The program's purpose is takes one string column as input and output should be multiple rows after splitting the input column(string) by space. Have generated the jar file and added the jar in hive shell , also created temporary function for the class name. while calling the function gtting array index out of bound error.

Code :

 

package com.suba.customHiveUdfs;

import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class MyUdtf extends GenericUDTF {
    ArrayList<String> colList = new ArrayList<>(1);
    ArrayList<ObjectInspector> oiList = new ArrayList<>(1);
    PrimitiveObjectInspector poi = null;
    @Override
    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
        // TODO Auto-generated method stub
        if (argOIs.length > 1) {
            throw new UDFArgumentException("invalid argument");
        }
        if (argOIs[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentException("primitive expected");
        }
        if (((PrimitiveObjectInspector) argOIs[0])
                .getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
            throw new UDFArgumentException("not string type");
        }        poi = (PrimitiveObjectInspector) argOIs[0];        colList.add("name");        oiList.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(colList, oiList);
    }
    @Override
    public void process(Object[] arg0) throws HiveException {
        String name = ((PrimitiveObjectInspector) poi).getPrimitiveJavaObject(arg0[0]).toString();
        String[] tokens = name.split(" ");
        for (String x : tokens) {
            Object[] objects = new Object[] { x };            forward(objects);
        }
    }
    @Override
    public void close() throws HiveException {
    }
}

Caused by: java.lang.ArrayIndexOutOfBoundsException: 1    at java.util.Arrays$ArrayList.get(Arrays.java:3841)    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:417)    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:592)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.forwardUDTFOutput(UDTFOperator.java:125)    at org.apache.hadoop.hive.ql.udf.generic.UDTFCollector.collect(UDTFCollector.java:45)    at org.apache.hadoop.hive.ql.udf.generic.GenericUDTF.forward(GenericUDTF.java:107)    at com.suba.customHiveUdfs.MyUdtf.process(MyUdtf.java:61)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.processOp(UDTFOperator.java:108)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:87)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:92)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:539)
    ... 9 more


The error message shown below: Getting array index out of bound error.

Caused by: java.lang.ArrayIndexOutOfBoundsException: 1    at java.util.Arrays$ArrayList.get(Arrays.java:3841)    at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:417)    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:592)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.forwardUDTFOutput(UDTFOperator.java:125)    at org.apache.hadoop.hive.ql.udf.generic.UDTFCollector.collect(UDTFCollector.java:45)    at org.apache.hadoop.hive.ql.udf.generic.GenericUDTF.forward(GenericUDTF.java:107)    at com.suba.customHiveUdfs.MyUdtf.process(MyUdtf.java:61)    at org.apache.hadoop.hive.ql.exec.UDTFOperator.processOp(UDTFOperator.java:108)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:87)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:92)    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:796)    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:539)
    ... 9 more
Don't have an account?
Coming from Hortonworks? Activate your account here