Created 12-10-2020 01:43 AM
Hi,
I'm submitting a spark program in cluster mode in two clusters. The job process large data sets
First cluster runs HDP 3.1 and using HiveWarehosueConnector to submit the spark script while the second cluster is HDP 2.6.
The program throws OutOFMemory exception in HDP3 while it manages to succeed in HDP2.6.
The spark configuration i.e. driver memory, executors etc.. are identical and the cluster configuration are similar as well as far as I know.
is this case related to HWC or should I look at something else?
spark configurations (for both clusters)
spark_executors=96
executor_memory=24883M
driver_memory=24883M
snippet of the logs:
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:371)
at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:150)
at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:150)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:605)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 38 more
Caused by: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.sql.SQLException: Error retrieving next row
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceReader.createBatchDataReaderFactories(HiveWarehouseDataSourceReader.java:150)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec.inputRDD$lzycompute(DataSourceV2ScanExec.scala:64)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec.inputRDD(DataSourceV2ScanExec.scala:60)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec.inputRDDs(DataSourceV2ScanExec.scala:79)
at org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:121)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:605)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:557)
at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:557)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:296)
at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:557)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:371)
at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:150)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:605)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 59 more
Caused by: java.lang.RuntimeException: java.io.IOException: java.sql.SQLException: Error retrieving next row
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceReader.getSplitsFactories(HiveWarehouseDataSourceReader.java:168)
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceReader.createBatchDataReaderFactories(HiveWarehouseDataSourceReader.java:146)
... 97 more
Caused by: java.io.IOException: java.sql.SQLException: Error retrieving next row
at org.apache.hadoop.hive.llap.LlapBaseInputFormat.getSplits(LlapBaseInputFormat.java:320)
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceReader.getSplitsFactories(HiveWarehouseDataSourceReader.java:161)
... 98 more
Caused by: java.sql.SQLException: Error retrieving next row
at shadehive.org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:397)
at org.apache.hadoop.hive.llap.LlapBaseInputFormat.getSplits(LlapBaseInputFormat.java:302)
... 99 more
Caused by: org.apache.thrift.TException: Error in calling method FetchResults
at shadehive.org.apache.hive.jdbc.HiveConnection$SynchronizedHandler.invoke(HiveConnection.java:1719)
at com.sun.proxy.$Proxy24.FetchResults(Unknown Source)
at shadehive.org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:378)
... 100 more
Caused by: java.lang.OutOfMemoryError
at java.io.ByteArrayOutputStream.hugeCapacity(ByteArrayOutputStream.java:123)
at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:117)
at java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
at org.apache.thrift.transport.THttpClient.flushUsingHttpClient(THttpClient.java:279)
at org.apache.thrift.transport.THttpClient.flush(THttpClient.java:316)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:73)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:62)
at shadehive.org.apache.hive.service.rpc.thrift.TCLIService$Client.send_FetchResults(TCLIService.java:561)
at shadehive.org.apache.hive.service.rpc.thrift.TCLIService$Client.FetchResults(TCLIService.java:553)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at shadehive.org.apache.hive.jdbc.HiveConnection$SynchronizedHandler.invoke(HiveConnection.java:1711)
... 102 more