Member since 
    
	
		
		
		04-20-2015
	
	
	
	
	
	
	
	
	
	
	
	
	
	
			
      
                3
            
            
                Posts
            
        
                1
            
            
                Kudos Received
            
        
                2
            
            
                Solutions
            
        My Accepted Solutions
| Title | Views | Posted | 
|---|---|---|
| 3848 | 10-16-2017 10:16 AM | 
			
    
	
		
		
		02-01-2018
	
		
		10:46 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							 Hi, this is my Scala code to read Hbase tables. It's working with Hbase latest version 1.1.2.2.6.4.0-91 (HDP 2.6.4, Ambari 2.6.1).  The key parameter is:  conf.set("zookeeper.znode.parent", "/hbase-unsecure")  because zookeeper doesn't hold the Hbase master detail.  Check:  # /usr/hdp/2.6.4.0-91/zookeeper/bin/zkCli.sh -server <server>.hortonworks.com:2181  # ls /hbase-unsecure/master return []  import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Connection
import org.apache.hadoop.hbase.client.ConnectionFactory
import org.apache.hadoop.hbase.TableName
object Hbase {
  val conf: Configuration = HBaseConfiguration.create()
  def main(args: Array[String]): Unit = {
    //conf.set("hbase.master", "<server>.hortonworks.com" + ":" + "60000")
    conf.setInt("timeout", 120000)
    conf.set("hbase.zookeeper.quorum", "<server>.hortonworks.com")
    conf.set("zookeeper.znode.parent", "/hbase-unsecure") // IMPORTANT!!!
    conf.setInt("hbase.client.scanner.caching", 10000)
    val connection: Connection = ConnectionFactory.createConnection(conf)
    val table = connection.getTable(TableName.valueOf("trading"))
    print("connection created")
    val admin = connection.getAdmin
    // List the tables.
    val listtables = admin.listTables()
    listtables.foreach(println)
    connection.close()
  }
}  Result:  'trading', {NAME => 'ca', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'} 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		10-16-2017
	
		
		10:16 AM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
	
		1 Kudo
		
	
				
		
	
		
					
							 Hi Anirban,  All transformations in Spark are lazy, in that they do not compute their results right away. Instead, they just remember the transformations applied to some base dataset (e.g. a file). The transformations are only computed when an action requires a result to be returned to the driver program. This design enables Spark to run more efficiently. For example, we can realize that a dataset created through  map  will be used in a  reduce  and return only the result of the  reduce  to the driver, rather than the larger mapped dataset.  By default, each transformed RDD may be recomputed each time you run an action on it. However, you may also persist an RDD in memory using the  persist  (or  cache ) method, in which case Spark will keep the elements around on the cluster for much faster access the next time you query it. There is also support for persisting RDDs on disk, or replicated across multiple nodes.  More: http://spark.apache.org/docs/2.1.1/programming-guide.html  Regards,  Jan 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		 
        






