Created on 12-05-2017 11:28 PM - edited 09-16-2022 05:36 AM
Hi Folks,
I am having issues connecting to the Hbase instance running on CDH 5.13 with my scala code. THe build.sbt and the code is given below, I have tried to follow the steps in
build.sbt
version := "0.0.1-SNAPSHOT"
name := "entityextractor"
organization := "in.sample"
val scalaTestVersion = "3.0.1"
val sparkVersion = "2.1.0"
val hadoopVersion = "2.7.1"
val hbaseVersion = "1.2.2"
val droolsVersion = "6.2.0.Final"
// coverageEnabled := true
// scalastyle:off
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion exclude("javax.servlet", "servlet-api"),
"org.apache.spark" %% "spark-streaming" % sparkVersion exclude("javax.servlet", "servlet-api"),
"org.apache.spark" %% "spark-streaming-kafka-0-8" % sparkVersion exclude("org.apache.spark", "spark-streaming_2.10"),
"org.apache.spark" % "spark-hive_2.11" % "2.1.0" % "provided",
"org.apache.spark" %% "spark-mllib" % "2.1.0"
)
libraryDependencies ++= Seq(
"org.apache.hbase" % "hbase-server" % hbaseVersion exclude("org.mortbay.jetty", "servlet-api-2.5"),
"org.apache.hbase" % "hbase-common" % hbaseVersion exclude("javax.servlet", "servlet-api"),
"org.apache.hbase" % "hbase-client" % hbaseVersion exclude("javax.servlet", "servlet-api"),
"org.apache.hadoop" % "hadoop-common" % hadoopVersion exclude("javax.servlet", "servlet-api"),
"org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion exclude("javax.servlet", "servlet-api")
)
libraryDependencies ++= Seq(
"org.apache.hbase" % "hbase-testing-util" % hbaseVersion % Test,
"org.apache.hbase" % "hbase-server" % hbaseVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"),
"org.apache.hbase" % "hbase-common" % hbaseVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"),
"org.apache.hbase" % "hbase-hadoop-compat" % hbaseVersion classifier "tests" exclude("javax.servlet", "servlet-api"),
"org.apache.hbase" % "hbase-hadoop2-compat" % hbaseVersion classifier "tests" exclude("javax.servlet", "servlet-api"),
"org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"),
"org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api")
)
libraryDependencies ++= Seq(
"org.drools" % "drools-core" % droolsVersion,
"org.drools" % "drools-compiler" % droolsVersion
)
libraryDependencies += "org.scala-lang" % "scala-parser-combinators" % "2.11.0-M4"
libraryDependencies += "com.google.guava" % "guava" % "14.0" force()
libraryDependencies += "com.typesafe" % "config" % "1.2.0"
libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.1.0"
// scalastyle:on
libraryDependencies += "com.rockymadden.stringmetric" %% "stringmetric-core" % "0.27.4"
// https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-spark
libraryDependencies += "org.apache.phoenix" % "phoenix-spark" % "4.11.0-HBase-1.2"
parallelExecution in test := false
test in assembly := {}
HbaseCOnnectionUtil.scala
object HbaseConnectionUtil {
/* NOTE :
* Make sure that hbase-site.xml is sourced instead of manually calling conf.set() for hbase.zookeeper.quorum, etc.
* */
private val logger: Logger = Logger.getLogger(HbaseConnectionUtil.getClass)
val conf: Configuration = HBaseConfiguration.create()
conf.addResource(new Path(hbaseConfDir + "/hbase-site.xml"))
conf.addResource(new Path(hbaseConfDir + "/core-site.xml"))
conf.set("hbase.client.retries.number", "3")
val connection = ConnectionFactory.createConnection(this.conf)
def getConnection(): Connection = {
this.connection
}
def getConf(): Configuration = {
this.conf
}
}
And using this object in my code to get the connection in the following manner
val admin = HbaseConnectionUtil.getConnection().getAdmin
Getting the following error
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.library.path=:/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA>
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.version=2.6.32-573.el6.x86_64
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.name=cloudera
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.home=/home/cloudera
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.dir=/home/cloudera
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=quickstart.cloudera:2181 sessionTimeout=60000 watcher=hconnection-0x391d28ea0x0, quorum=quickstart.cloudera:2181, baseZNode=/hbase
Exception in thread "main" java.io.IOException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:240)
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:218)
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:119)
at in.thedatateam.sample.util.HbaseConnectionUtil$.createConnection(HbaseConnectionUtil.scala:24)
at in.thedatateam.sample.sample$.main(sample.scala:25)
at in.thedatateam.sample.sample.main(sample.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:238)
... 14 more
Caused by: java.lang.NoClassDefFoundError: org/apache/htrace/Trace
at org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper.exists(RecoverableZooKeeper.java:217)
at org.apache.hadoop.hbase.zookeeper.ZKUtil.checkExists(ZKUtil.java:419)
at org.apache.hadoop.hbase.zookeeper.ZKClusterId.readClusterIdZNode(ZKClusterId.java:65)
at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getClusterId(ZooKeeperRegistry.java:105)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.retrieveClusterId(ConnectionManager.java:905)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:648)
... 19 more
Caused by: java.lang.ClassNotFoundException: org.apache.htrace.Trace
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 25 more
17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Opening socket connection to server quickstart.cloudera/10.0.2.15:2181. Will not attempt to authenticate using SASL (unknown error)
17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /10.0.2.15:53759, server: quickstart.cloudera/10.0.2.15:2181
17/12/05 23:26:07 INFO spark.SparkContext: Invoking stop() from shutdown hook
17/12/05 23:26:07 ERROR scheduler.LiveListenerBus: Listener EventLoggingListener threw an exception
java.io.IOException: All datanodes DatanodeInfoWithStorage[10.0.2.15:50010,DS-6bc44b86-0da7-406b-b1ca-5d0085dd1f0d,DISK] are bad. Aborting...
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1466)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:1237)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:722)
17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Session establishment complete on server quickstart.cloudera/10.0.2.15:2181, sessionid = 0x1602a25d2c40279, negotiated timeout = 60000
17/12/05 23:26:07 INFO server.AbstractConnector: Stopped Spark@2d8f2f3a{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
17/12/05 23:26:07 INFO ui.SparkUI: Stopped Spark web UI at http://10.0.2.15:4040
17/12/05 23:26:07 ERROR util.Utils: Uncaught exception in thread Thread-2
java.lang.IllegalArgumentException: Self-suppression not permitted
at java.lang.Throwable.addSuppressed(Throwable.java:1043)
at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
at sun.nio.cs.StreamEncoder.implClose(StreamEncoder.java:320)
at sun.nio.cs.StreamEncoder.close(StreamEncoder.java:149)
at java.io.OutputStreamWriter.close(OutputStreamWriter.java:233)
at java.io.BufferedWriter.close(BufferedWriter.java:266)
at java.io.PrintWriter.close(PrintWriter.java:339)
at org.apache.spark.scheduler.EventLoggingListener$$anonfun$stop$1.apply(EventLoggingListener.scala:230)
at org.apache.spark.scheduler.EventLoggingListener$$anonfun$stop$1.apply(EventLoggingListener.scala:230)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.EventLoggingListener.stop(EventLoggingListener.scala:230)
at org.apache.spark.SparkContext$$anonfun$stop$7$$anonfun$apply$mcV$sp$5.apply(SparkContext.scala:1917)
at org.apache.spark.SparkContext$$anonfun$stop$7$$anonfun$apply$mcV$sp$5.apply(SparkContext.scala:1917)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.SparkContext$$anonfun$stop$7.apply$mcV$sp(SparkContext.scala:1917)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1317)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1916)
at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:581)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1954)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
Caused by: java.io.IOException: All datanodes DatanodeInfoWithStorage[10.0.2.15:50010,DS-6bc44b86-0da7-406b-b1ca-5d0085dd1f0d,DISK] are bad. Aborting...
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1466)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:1237)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:722)
17/12/05 23:26:07 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/12/05 23:26:07 INFO memory.MemoryStore: MemoryStore cleared
17/12/05 23:26:07 INFO storage.BlockManager: BlockManager stopped
17/12/05 23:26:07 INFO storage.BlockManagerMaster: BlockManagerMaster stopped
17/12/05 23:26:07 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/12/05 23:26:07 INFO spark.SparkContext: Successfully stopped SparkContext
17/12/05 23:26:07 INFO util.ShutdownHookManager: Shutdown hook called
17/12/05 23:26:07 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-5c1430f1-b970-4f23-8cad-1daeb1d93686
Created 12-06-2017 05:57 AM