Created 11-20-2018 09:50 AM
Hive Table:
Table name: sample_tb
l +------------+------------+----------+
| col_name | data_type | comment
| +------------+------------+----------+ |
float_col | float |
| +------------+------------+----------+
Data in sample_tbl
+-----------------------+ |
sample_tbl.float_col |
+-----------------------+ |
100.12 |
+-----------------------+
Sample code
import com.hortonworks.hwc.HiveWarehouseSession
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
val conf = new SparkConf().set("spark.submit.deployMode", "cluster").set("spark.hadoop.job.ugi","spark").set("spark.sql.hive.hiveserver2.jdbc.url","jdbc:hive2://xxx1:2181,xxx3:2181,xxx2:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2;principal=hive/_HOST@KERB.DOMAIN.COM").set("spark.datasource.hive.warehouse.metastoreUri","thrift://xxx2:9083").set("spark.hadoop.hive.llap.daemon.service.hosts","@llap0").set("spark.hadoop.hive.zookeeper.quorum","xxx1:2181,xxx3:2181,xxx2:2181").set("spark.sql.hive.hiveserver2.jdbc.url.principal","hive/_HOST@KERB.DOMAIN.COM").setMaster("yarn").setAppName("test app")
val sparkSession = SparkSession.builder().config(conf).appName("Zeppelin test").enableHiveSupport().getOrCreate();
val hive = HiveWarehouseSession.session(sparkSession).build()
val resultDataSet = hive.execute("select * from ao_test.sample_tbl")
resultDataSet.show()
Exception log:
Java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Float at scala.runtime.BoxesRunTime.unboxToFloat(BoxesRunTime.java:109) at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getFloat(rows.scala:43) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getFloat(rows.scala:195) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:47) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:359) at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:139) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$23.applyOrElse(Optimizer.scala:1193) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$23.applyOrElse(Optimizer.scala:1188) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:1188) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:1187) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2975) at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2973) at org.apache.spark.sql.Dataset.toJavaRDD(Dataset.scala:2985) at org.apache.spark.sql.Dataset.javaRDD(Dataset.scala:2992) at com.apporchid.cloudseer.datasink.db.RelationalDBDatasink.write(RelationalDBDatasink.java:475) at com.apporchid.cloudseer.common.datasink.BaseDatasink.write(BaseDatasink.java:141) at com.apporchid.cloudseer.common.pipeline.task.BaseDatasinkTask.writeData(BaseDatasinkTask.java:53) at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTask(SparkPipelineRunner.java:95) at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTasks(SparkPipelineRunner.java:45) at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTasks(SparkPipelineRunner.java:55) at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner$SparkPipelineEventObserver.onNext(SparkPipelineRunner.java:114) at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner$SparkPipelineEventObserver.onNext(SparkPipelineRunner.java:102) at io.reactivex.subjects.PublishSubject$PublishDisposable.onNext(PublishSubject.java:264) at io.reactivex.subjects.PublishSubject.onNext(PublishSubject.java:183) at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:34) at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:26) at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:21) at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.beginExecution(BasePipelineRunner.java:176) at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.execute(BasePipelineRunner.java:140) at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.run(BasePipelineRunner.java:96) at com.apporchid.cloudseer.pipeline.Pipeline.run(Pipeline.java:269) at com.apporchid.cloudseer.pipeline.Pipeline.run(Pipeline.java:263) at com.apporchid.cloudseer.sparksubmit.api.yarnclient.SparkClusterPipelineStarter.main(SparkClusterPipelineStarter.java:22) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$4.run(ApplicationMaster.scala:721)
Created 11-20-2018 06:30 PM
Try disabling vectorization for this job alone, I remember this being a bug in hive-1.2.1 i.e set hive.vectorized.execution.enabled=false; as workaround.