Member since
02-26-2018
2
Posts
0
Kudos Received
0
Solutions
09-21-2018
02:53 PM
This is the code ubuntu@ip-10-0-2-24:~$ spark-shell --packages com.hortonworks:shc-core:1.1.0-2.1 -s_2.11 --repositories http://repo.hortonworks.com/content/groups/public/
scala> import org.apache.spark.sql.{SQLContext, _}
import org.apache.spark.sql.{SQLContext, _} scala> import org.apache.spark.sql.execution.datasources.hbase._ import org.apache.spark.sql.execution.datasources.hbase._ scala> import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.{SparkConf, SparkContext}
scala> import spark.sqlContext.implicits._ import spark.sqlContext.implicits._ scala> def catalog = s"""{
| |"table":{"namespace":"default", "name":"Contacts"},
| |"rowkey":"key",
| |"columns":{
| |"rowkey":{"cf":"rowkey", "col":"key", "type":"string"},
| |"officeAddress":{"cf":"Office", "col":"Address", "type":"string"},
| |"officePhone":{"cf":"Office", "col":"Phone", "type":"string"},
| |"personalName":{"cf":"Personal", "col":"Name", "type":"string"},
| |"personalPhone":{"cf":"Personal", "col":"Phone", "type":"string"}
| |}
| |}""".stripMargin
catalog: String scala> def withCatalog(cat: String): DataFrame = {
| spark.sqlContext
| .read
| .options(Map(HBaseTableCatalog.tableCatalog->cat))
| .format("org.apache.spark.sql.execution.datasources.hbase")
| .load()
| } withCatalog: (cat: String)org.apache.spark.sql.DataFrame scala> val df = withCatalog(catalog)
df: org.apache.spark.sql.DataFrame = [rowkey: string, officeAddress: string ... 3 more fields] this is the error scala> df.show java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:208)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:320)
at org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:295)
at org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:160)
at org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:155)
at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:821)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:193)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:89)
at org.apache.hadoop.hbase.client.MetaScanner.listTableRegionLocations(MetaScanner.java:343)
at org.apache.hadoop.hbase.client.HRegionLocator.listRegionLocations(HRegionLocator.java:142)
at org.apache.hadoop.hbase.client.HRegionLocator.getStartEndKeys(HRegionLocator.java:118)
at org.apache.spark.sql.execution.datasources.hbase.RegionResource$anonfun$1.apply(HBaseResources.scala:109)
at org.apache.spark.sql.execution.datasources.hbase.RegionResource$anonfun$1.apply(HBaseResources.scala:108)
at org.apache.spark.sql.execution.datasources.hbase.ReferencedResource$class.releaseOnException(HBaseResources.scala:77)
at org.apache.spark.sql.execution.datasources.hbase.RegionResource.releaseOnException(HBaseResources.scala:88)
at org.apache.spark.sql.execution.datasources.hbase.RegionResource.<init>(HBaseResources.scala:108)
at org.apache.spark.sql.execution.datasources.hbase.HBaseTableScanRDD.getPartitions(HBaseTableScan.scala:61)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:314)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$collectFromPlan(Dataset.scala:2861)
at org.apache.spark.sql.Dataset$anonfun$head$1.apply(Dataset.scala:2150)
at org.apache.spark.sql.Dataset$anonfun$head$1.apply(Dataset.scala:2150)
at org.apache.spark.sql.Dataset$anonfun$55.apply(Dataset.scala:2842)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2841)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2150)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2363)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:241)
at org.apache.spark.sql.Dataset.show(Dataset.scala:637)
at org.apache.spark.sql.Dataset.show(Dataset.scala:596)
at org.apache.spark.sql.Dataset.show(Dataset.scala:605)
... 54 elided
Caused by: java.lang.NullPointerException
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:395)
at org.apache.hadoop.hbase.zookeeper.MetaTableLocator.blockUntilAvailable(MetaTableLocator.java:553)
at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:61)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1185)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1152)
at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:300)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:151)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:59)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
... 103 more
... View more