Support Questions

Find answers, ask questions, and share your expertise

Getting Java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Float when we try to retrieve data from Hive table using spark hive warehouse connector in HDP3,

avatar
New Contributor

Hive Table:

Table name: sample_tb

l +------------+------------+----------+

| col_name | data_type | comment

| +------------+------------+----------+ |

float_col | float |

| +------------+------------+----------+

Data in sample_tbl

+-----------------------+ |

sample_tbl.float_col |

+-----------------------+ |

100.12 |

+-----------------------+

Sample code

import com.hortonworks.hwc.HiveWarehouseSession

import org.apache.spark.SparkConf

import org.apache.spark.sql.SparkSession

val conf = new SparkConf().set("spark.submit.deployMode", "cluster").set("spark.hadoop.job.ugi","spark").set("spark.sql.hive.hiveserver2.jdbc.url","jdbc:hive2://xxx1:2181,xxx3:2181,xxx2:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2;principal=hive/_HOST@KERB.DOMAIN.COM").set("spark.datasource.hive.warehouse.metastoreUri","thrift://xxx2:9083").set("spark.hadoop.hive.llap.daemon.service.hosts","@llap0").set("spark.hadoop.hive.zookeeper.quorum","xxx1:2181,xxx3:2181,xxx2:2181").set("spark.sql.hive.hiveserver2.jdbc.url.principal","hive/_HOST@KERB.DOMAIN.COM").setMaster("yarn").setAppName("test app")

val sparkSession = SparkSession.builder().config(conf).appName("Zeppelin test").enableHiveSupport().getOrCreate();

val hive = HiveWarehouseSession.session(sparkSession).build()

val resultDataSet = hive.execute("select * from ao_test.sample_tbl")

resultDataSet.show()


Exception log:


Java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Float
    at scala.runtime.BoxesRunTime.unboxToFloat(BoxesRunTime.java:109)
      at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getFloat(rows.scala:43)
        at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getFloat(rows.scala:195)
   at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:47)
     at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:359)
 at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:139)
 at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48)
       at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30)
     at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
     at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
     at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
     at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
        at scala.collection.AbstractTraversable.map(Traversable.scala:104)
 at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$23.applyOrElse(Optimizer.scala:1193)
       at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$23.applyOrElse(Optimizer.scala:1188)
     at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267)
   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266)
     at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
      at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272)
       at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
      at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
       at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272)
 at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256)
       at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:1188)
    at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$.apply(Optimizer.scala:1187)
 at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
        at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
       at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
 at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
        at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35)
   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
   at scala.collection.immutable.List.foreach(List.scala:381)
    at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
     at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66)
      at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66)
  at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
       at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
       at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
 at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
      at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
       at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
   at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2975)
    at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2973)
        at org.apache.spark.sql.Dataset.toJavaRDD(Dataset.scala:2985)
      at org.apache.spark.sql.Dataset.javaRDD(Dataset.scala:2992)
      at com.apporchid.cloudseer.datasink.db.RelationalDBDatasink.write(RelationalDBDatasink.java:475)
 at com.apporchid.cloudseer.common.datasink.BaseDatasink.write(BaseDatasink.java:141)
        at com.apporchid.cloudseer.common.pipeline.task.BaseDatasinkTask.writeData(BaseDatasinkTask.java:53)
       at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTask(SparkPipelineRunner.java:95)
   at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTasks(SparkPipelineRunner.java:45)
      at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner.executeTasks(SparkPipelineRunner.java:55)
 at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner$SparkPipelineEventObserver.onNext(SparkPipelineRunner.java:114)
      at com.apporchid.cloudseer.pipeline.runner.SparkPipelineRunner$SparkPipelineEventObserver.onNext(SparkPipelineRunner.java:102)
   at io.reactivex.subjects.PublishSubject$PublishDisposable.onNext(PublishSubject.java:264)
     at io.reactivex.subjects.PublishSubject.onNext(PublishSubject.java:183)
 at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:34)
        at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:26)
       at com.apporchid.cloudseer.common.pipeline.event.PipelineEventPublisher.publishEvent(PipelineEventPublisher.java:21)
      at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.beginExecution(BasePipelineRunner.java:176)
 at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.execute(BasePipelineRunner.java:140)
   at com.apporchid.cloudseer.common.pipeline.runner.BasePipelineRunner.run(BasePipelineRunner.java:96)
  at com.apporchid.cloudseer.pipeline.Pipeline.run(Pipeline.java:269)
  at com.apporchid.cloudseer.pipeline.Pipeline.run(Pipeline.java:263)
  at com.apporchid.cloudseer.sparksubmit.api.yarnclient.SparkClusterPipelineStarter.main(SparkClusterPipelineStarter.java:22)
  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
       at java.lang.reflect.Method.invoke(Method.java:498)
       at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$4.run(ApplicationMaster.scala:721)
1 REPLY 1

avatar
Contributor

Try disabling vectorization for this job alone, I remember this being a bug in hive-1.2.1 i.e set hive.vectorized.execution.enabled=false; as workaround.