Member since
12-05-2017
2
Posts
0
Kudos Received
0
Solutions
12-06-2017
01:17 AM
Hey Harsha, I am facing a similar problem with the CDH 5.13 version.. have shared the details here http://community.cloudera.com/t5/Data-Ingestion-Integration/Problem-in-connecting-to-Hbase-from-scala-code-in-Cloudera/m-p/62519#M2779 Please let me knoe if there is something wrong that I am doing. Thanks
... View more
12-05-2017
11:28 PM
Hi Folks,
I am having issues connecting to the Hbase instance running on CDH 5.13 with my scala code. THe build.sbt and the code is given below, I have tried to follow the steps in
https://community.cloudera.com/t5/Data-Ingestion-Integration/Problem-in-connecting-Hbase-from-Scala-code-in-Cloudera-Quick/m-p/52773
build.sbt
version := "0.0.1-SNAPSHOT" name := "entityextractor" organization := "in.sample" val scalaTestVersion = "3.0.1" val sparkVersion = "2.1.0" val hadoopVersion = "2.7.1" val hbaseVersion = "1.2.2" val droolsVersion = "6.2.0.Final" // coverageEnabled := true // scalastyle:off libraryDependencies ++= Seq( "org.apache.spark" %% "spark-sql" % sparkVersion exclude("javax.servlet", "servlet-api"), "org.apache.spark" %% "spark-streaming" % sparkVersion exclude("javax.servlet", "servlet-api"), "org.apache.spark" %% "spark-streaming-kafka-0-8" % sparkVersion exclude("org.apache.spark", "spark-streaming_2.10"), "org.apache.spark" % "spark-hive_2.11" % "2.1.0" % "provided", "org.apache.spark" %% "spark-mllib" % "2.1.0" ) libraryDependencies ++= Seq( "org.apache.hbase" % "hbase-server" % hbaseVersion exclude("org.mortbay.jetty", "servlet-api-2.5"), "org.apache.hbase" % "hbase-common" % hbaseVersion exclude("javax.servlet", "servlet-api"), "org.apache.hbase" % "hbase-client" % hbaseVersion exclude("javax.servlet", "servlet-api"), "org.apache.hadoop" % "hadoop-common" % hadoopVersion exclude("javax.servlet", "servlet-api"), "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion exclude("javax.servlet", "servlet-api") ) libraryDependencies ++= Seq( "org.apache.hbase" % "hbase-testing-util" % hbaseVersion % Test, "org.apache.hbase" % "hbase-server" % hbaseVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"), "org.apache.hbase" % "hbase-common" % hbaseVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"), "org.apache.hbase" % "hbase-hadoop-compat" % hbaseVersion classifier "tests" exclude("javax.servlet", "servlet-api"), "org.apache.hbase" % "hbase-hadoop2-compat" % hbaseVersion classifier "tests" exclude("javax.servlet", "servlet-api"), "org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api"), "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion % Test classifier "tests" exclude("javax.servlet", "servlet-api") ) libraryDependencies ++= Seq( "org.drools" % "drools-core" % droolsVersion, "org.drools" % "drools-compiler" % droolsVersion ) libraryDependencies += "org.scala-lang" % "scala-parser-combinators" % "2.11.0-M4" libraryDependencies += "com.google.guava" % "guava" % "14.0" force() libraryDependencies += "com.typesafe" % "config" % "1.2.0" libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.1.0" // scalastyle:on libraryDependencies += "com.rockymadden.stringmetric" %% "stringmetric-core" % "0.27.4" // https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-spark libraryDependencies += "org.apache.phoenix" % "phoenix-spark" % "4.11.0-HBase-1.2" parallelExecution in test := false test in assembly := {}
HbaseCOnnectionUtil.scala
object HbaseConnectionUtil { /* NOTE : * Make sure that hbase-site.xml is sourced instead of manually calling conf.set() for hbase.zookeeper.quorum, etc. * */ private val logger: Logger = Logger.getLogger(HbaseConnectionUtil.getClass) val conf: Configuration = HBaseConfiguration.create() conf.addResource(new Path(hbaseConfDir + "/hbase-site.xml")) conf.addResource(new Path(hbaseConfDir + "/core-site.xml")) conf.set("hbase.client.retries.number", "3") val connection = ConnectionFactory.createConnection(this.conf) def getConnection(): Connection = { this.connection } def getConf(): Configuration = { this.conf } }
And using this object in my code to get the connection in the following manner
val admin = HbaseConnectionUtil.getConnection().getAdmin
Getting the following error
17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.library.path=:/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA> 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:os.version=2.6.32-573.el6.x86_64 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.name=cloudera 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.home=/home/cloudera 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Client environment:user.dir=/home/cloudera 17/12/05 23:26:06 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=quickstart.cloudera:2181 sessionTimeout=60000 watcher=hconnection-0x391d28ea0x0, quorum=quickstart.cloudera:2181, baseZNode=/hbase Exception in thread "main" java.io.IOException: java.lang.reflect.InvocationTargetException at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:240) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:218) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:119) at in.thedatateam.sample.util.HbaseConnectionUtil$.createConnection(HbaseConnectionUtil.scala:24) at in.thedatateam.sample.sample$.main(sample.scala:25) at in.thedatateam.sample.sample.main(sample.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:238) ... 14 more Caused by: java.lang.NoClassDefFoundError: org/apache/htrace/Trace at org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper.exists(RecoverableZooKeeper.java:217) at org.apache.hadoop.hbase.zookeeper.ZKUtil.checkExists(ZKUtil.java:419) at org.apache.hadoop.hbase.zookeeper.ZKClusterId.readClusterIdZNode(ZKClusterId.java:65) at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getClusterId(ZooKeeperRegistry.java:105) at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.retrieveClusterId(ConnectionManager.java:905) at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:648) ... 19 more Caused by: java.lang.ClassNotFoundException: org.apache.htrace.Trace at java.net.URLClassLoader.findClass(URLClassLoader.java:381) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) ... 25 more 17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Opening socket connection to server quickstart.cloudera/10.0.2.15:2181. Will not attempt to authenticate using SASL (unknown error) 17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Socket connection established, initiating session, client: /10.0.2.15:53759, server: quickstart.cloudera/10.0.2.15:2181 17/12/05 23:26:07 INFO spark.SparkContext: Invoking stop() from shutdown hook 17/12/05 23:26:07 ERROR scheduler.LiveListenerBus: Listener EventLoggingListener threw an exception java.io.IOException: All datanodes DatanodeInfoWithStorage[10.0.2.15:50010,DS-6bc44b86-0da7-406b-b1ca-5d0085dd1f0d,DISK] are bad. Aborting... at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1466) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:1237) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:722) 17/12/05 23:26:07 INFO zookeeper.ClientCnxn: Session establishment complete on server quickstart.cloudera/10.0.2.15:2181, sessionid = 0x1602a25d2c40279, negotiated timeout = 60000 17/12/05 23:26:07 INFO server.AbstractConnector: Stopped Spark@2d8f2f3a{HTTP/1.1,[http/1.1]}{0.0.0.0:4040} 17/12/05 23:26:07 INFO ui.SparkUI: Stopped Spark web UI at http://10.0.2.15:4040 17/12/05 23:26:07 ERROR util.Utils: Uncaught exception in thread Thread-2 java.lang.IllegalArgumentException: Self-suppression not permitted at java.lang.Throwable.addSuppressed(Throwable.java:1043) at java.io.FilterOutputStream.close(FilterOutputStream.java:159) at sun.nio.cs.StreamEncoder.implClose(StreamEncoder.java:320) at sun.nio.cs.StreamEncoder.close(StreamEncoder.java:149) at java.io.OutputStreamWriter.close(OutputStreamWriter.java:233) at java.io.BufferedWriter.close(BufferedWriter.java:266) at java.io.PrintWriter.close(PrintWriter.java:339) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$stop$1.apply(EventLoggingListener.scala:230) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$stop$1.apply(EventLoggingListener.scala:230) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.EventLoggingListener.stop(EventLoggingListener.scala:230) at org.apache.spark.SparkContext$$anonfun$stop$7$$anonfun$apply$mcV$sp$5.apply(SparkContext.scala:1917) at org.apache.spark.SparkContext$$anonfun$stop$7$$anonfun$apply$mcV$sp$5.apply(SparkContext.scala:1917) at scala.Option.foreach(Option.scala:257) at org.apache.spark.SparkContext$$anonfun$stop$7.apply$mcV$sp(SparkContext.scala:1917) at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1317) at org.apache.spark.SparkContext.stop(SparkContext.scala:1916) at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:581) at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1954) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) at scala.util.Try$.apply(Try.scala:192) at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178) at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54) Caused by: java.io.IOException: All datanodes DatanodeInfoWithStorage[10.0.2.15:50010,DS-6bc44b86-0da7-406b-b1ca-5d0085dd1f0d,DISK] are bad. Aborting... at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1466) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:1237) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:722) 17/12/05 23:26:07 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! 17/12/05 23:26:07 INFO memory.MemoryStore: MemoryStore cleared 17/12/05 23:26:07 INFO storage.BlockManager: BlockManager stopped 17/12/05 23:26:07 INFO storage.BlockManagerMaster: BlockManagerMaster stopped 17/12/05 23:26:07 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! 17/12/05 23:26:07 INFO spark.SparkContext: Successfully stopped SparkContext 17/12/05 23:26:07 INFO util.ShutdownHookManager: Shutdown hook called 17/12/05 23:26:07 INFO util.ShutdownHookManager: Deleting directory /tmp/spark-5c1430f1-b970-4f23-8cad-1daeb1d93686
... View more
Labels: