Member since
06-10-2019
1
Post
0
Kudos Received
0
Solutions
06-11-2019
11:45 AM
getWritableSize(ObjectInspector oi, Object value) method in org.apache.hadoop.hive.ql.stats.StatsUtils class fails with NPE when 2nd parameter (Object value) is null. Apache hive bug filed: https://issues.apache.org/jira/browse/HIVE-21853 Issue was originally found when running UDF query against Hortonworks cluster with HDP 3.1 running Hive 3.1.0. The issue occurs when executing the UDF against a cluster using the tez execution engine Environment Hortonworks Ambari version 2.7.3.0 HDP stack version 3.1 HDP stack repo version 3.1.0.0 stack vdf version 3.1.0.0-78 sample UDF import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
import java.util.ArrayList;
import java.util.List;
public class Hive21853UDF extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
ArrayList<String> structFieldNames = new ArrayList<>();
ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<>();
structFieldNames.add("val1");
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
structFieldNames.add("val2");
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
StructObjectInspector si = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames,
structFieldObjectInspectors);
return si;
}
@Override
public List<Object> evaluate(DeferredObject[] arguments) throws HiveException {
List<Object> result = new ArrayList<>();
result.add(new Text("value1"));
result.add(null);
return result;
}
@Override
public String getDisplayString(String[] children) {
return "My test udf";
}
} Beeline configuration set hive.execution.engine=tez;
set hive.fetch.task.conversion=none; Steps to reproduce on a Hortonworks cluster with HDP 3.1 deployed -start beeline Hive session -set above hive configurations -add jar containing UDF from sample code -create table containing one string column create table tmptable(col1 string) insert into table tmptable values ('somestring') -create function hiveUdf as 'Hive21853UDF '; -select hiveUdf from tmptable; this will result in a null pointer exception similar to this ql.Driver ()) - FAILED: NullPointerException nulljava.lang.NullPointerException
at org.apache.hadoop.hive.ql.stats.StatsUtils.getWritableSize(StatsUtils.java:1373)
at org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfStruct(StatsUtils.java:1356)
at org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfComplexTypes(StatsUtils.java:1212)
at org.apache.hadoop.hive.ql.stats.StatsUtils.getAvgColLenOf(StatsUtils.java:1140)
at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExpression(StatsUtils.java:1584)
at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExprMap(StatsUtils.java:1424)
at org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule.process(StatsRulesProcFactory.java:196)
at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122)
at org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78)
at org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:397)
at org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:161)
at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:148)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12443)
at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:664)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1863)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1810)
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1805)
at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197)
at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:262)
at org.apache.hive.service.cli.operation.Operation.run(Operation.java:247)
at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541)
at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527)
at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562)
at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
... View more
Labels: