Created 06-11-2019 11:45 AM
getWritableSize(ObjectInspector oi, Object value) method in org.apache.hadoop.hive.ql.stats.StatsUtils class fails with NPE when 2nd parameter (Object value) is null.
Apache hive bug filed: https://issues.apache.org/jira/browse/HIVE-21853
Issue was originally found when running UDF query against Hortonworks cluster with HDP 3.1 running Hive 3.1.0. The issue occurs when executing the UDF against a cluster using the tez execution engine
Environment
Hortonworks
|
sample UDF
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; import java.util.ArrayList; import java.util.List; public class Hive21853UDF extends GenericUDF { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { ArrayList<String> structFieldNames = new ArrayList<>(); ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<>(); structFieldNames.add("val1"); structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); structFieldNames.add("val2"); structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); StructObjectInspector si = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors); return si; } @Override public List<Object> evaluate(DeferredObject[] arguments) throws HiveException { List<Object> result = new ArrayList<>(); result.add(new Text("value1")); result.add(null); return result; } @Override public String getDisplayString(String[] children) { return "My test udf"; } }
Beeline configuration
set hive.execution.engine=tez; set hive.fetch.task.conversion=none;
Steps to reproduce
on a Hortonworks cluster with HDP 3.1 deployed -start beeline Hive session -set above hive configurations -add jar containing UDF from sample code -create table containing one string column create table tmptable(col1 string) insert into table tmptable values ('somestring') -create function hiveUdf as 'Hive21853UDF '; -select hiveUdf from tmptable; this will result in a null pointer exception similar to this |
ql.Driver ()) - FAILED: NullPointerException nulljava.lang.NullPointerException at org.apache.hadoop.hive.ql.stats.StatsUtils.getWritableSize(StatsUtils.java:1373) at org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfStruct(StatsUtils.java:1356) at org.apache.hadoop.hive.ql.stats.StatsUtils.getSizeOfComplexTypes(StatsUtils.java:1212) at org.apache.hadoop.hive.ql.stats.StatsUtils.getAvgColLenOf(StatsUtils.java:1140) at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExpression(StatsUtils.java:1584) at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatisticsFromExprMap(StatsUtils.java:1424) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$SelectStatsRule.process(StatsRulesProcFactory.java:196) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78) at org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:397) at org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:161) at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:148) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12443) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:664) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1863) at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1810) at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1805) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197) at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:262) at org.apache.hive.service.cli.operation.Operation.run(Operation.java:247) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527) at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315) at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)