<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: HDP Spark Hbase Connector Cell Versions? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131521#M51688</link>
    <description>&lt;P&gt;Yes, you can specify which cell version to get. SHC users can select a timestamp, they can also select a time range with minimum timestamp and maximum timestamp (aka. retrieve multiple versions simultaneously). Please refer the test case &lt;A href="https://github.com/hortonworks-spark/shc/blob/master/core/src/test/scala/org/apache/spark/sql/DefaultSourceSuite.scala#L269"&gt;here&lt;/A&gt; about how to do it.&lt;/P&gt;</description>
    <pubDate>Wed, 18 Jan 2017 04:36:28 GMT</pubDate>
    <dc:creator>wyang</dc:creator>
    <dc:date>2017-01-18T04:36:28Z</dc:date>
    <item>
      <title>HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131517#M51684</link>
      <description>&lt;P&gt;When reading an hbase table into a dataframe, is there a way to specify which cell version to get? Or will this alway be the most recent?&lt;/P&gt;</description>
      <pubDate>Tue, 17 Jan 2017 11:48:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131517#M51684</guid>
      <dc:creator>todd_niven</dc:creator>
      <dc:date>2017-01-17T11:48:02Z</dc:date>
    </item>
    <item>
      <title>Re: HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131518#M51685</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/15450/toddniven.html" nodeid="15450"&gt;@Todd Niven&lt;/A&gt;&lt;/P&gt;&lt;P&gt;In your configuration, set the following and then use &lt;A href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Result.html#getColumnCells-byte:A-byte:A-"&gt;getColumnCells&lt;/A&gt; to get the version you want. Familiarize with &lt;A href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Result.html"&gt;Result.java&lt;/A&gt; from hbase client API which is probably what you are using.&lt;/P&gt;&lt;PRE&gt;conf.set("hbase.mapreduce.scan.maxversions", "VERSION_YOU_WANT")&lt;/PRE&gt;</description>
      <pubDate>Tue, 17 Jan 2017 13:38:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131518#M51685</guid>
      <dc:creator>mqureshi</dc:creator>
      <dc:date>2017-01-17T13:38:02Z</dc:date>
    </item>
    <item>
      <title>Re: HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131519#M51686</link>
      <description>&lt;P&gt;Does this approach work with &lt;A href="https://github.com/hortonworks-spark/shc" target="_blank"&gt;https://github.com/hortonworks-spark/shc&lt;/A&gt; ? I am hoping to retrieve multiple versions simultaneously.&lt;/P&gt;</description>
      <pubDate>Tue, 17 Jan 2017 13:47:14 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131519#M51686</guid>
      <dc:creator>todd_niven</dc:creator>
      <dc:date>2017-01-17T13:47:14Z</dc:date>
    </item>
    <item>
      <title>Re: HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131520#M51687</link>
      <description>&lt;P&gt;If this does not work for you please open the feature request by creating an issue on the github project for SHC. /cc &lt;A rel="user" href="https://community.cloudera.com/users/3028/wyang.html" nodeid="3028"&gt;@wyang&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 04:23:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131520#M51687</guid>
      <dc:creator>bikas</dc:creator>
      <dc:date>2017-01-18T04:23:02Z</dc:date>
    </item>
    <item>
      <title>Re: HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131521#M51688</link>
      <description>&lt;P&gt;Yes, you can specify which cell version to get. SHC users can select a timestamp, they can also select a time range with minimum timestamp and maximum timestamp (aka. retrieve multiple versions simultaneously). Please refer the test case &lt;A href="https://github.com/hortonworks-spark/shc/blob/master/core/src/test/scala/org/apache/spark/sql/DefaultSourceSuite.scala#L269"&gt;here&lt;/A&gt; about how to do it.&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 04:36:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131521#M51688</guid>
      <dc:creator>wyang</dc:creator>
      <dc:date>2017-01-18T04:36:28Z</dc:date>
    </item>
    <item>
      <title>Re: HDP Spark Hbase Connector Cell Versions?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131522#M51689</link>
      <description>&lt;P&gt;Yes. I think it should. I have not done it specifically but I have used Result.java class so it should work as it is the same class. Here is how I have done it.&lt;/P&gt;&lt;PRE&gt;  // create hbase configuration

        Configuration configuration = HBaseConfiguration.create();

        configuration.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

        configuration.set(TableInputFormat.INPUT_TABLE, hbaseTableName);




        // create java hbase context

        JavaHBaseContext javaHBaseContext = new JavaHBaseContext(javaSparkContext, configuration);




        JavaPairRDD&amp;lt;ImmutableBytesWritable, Result&amp;gt; hbaseRDD =

                javaSparkContext.newAPIHadoopRDD(configuration, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);




        JavaRDD&amp;lt;Row&amp;gt; rowJavaRDD = hbaseRDD.map(new Function&amp;lt;Tuple2&amp;lt;ImmutableBytesWritable, Result&amp;gt;, Row  &amp;gt;() {

            private static final long serialVersionUID = -2021713021648730786L;

            public Row  call(Tuple2&amp;lt;ImmutableBytesWritable, Result&amp;gt; tuple) throws Exception {




                Object[] rowObject = new Object[namearr.length];




                for (int i=0; i&amp;lt;namearr.length; i++) {

                    Result result = tuple._2;
                    // handle each data type we support
                    if (typesarr[i].equals("string")) {
                        String str = Bytes.toString(result.getValue(Bytes.toBytes(cfarr[i]), Bytes.toBytes(namearr[i])));
                        rowObject[i] = str;
                    }

                }
&lt;/PRE&gt;</description>
      <pubDate>Wed, 18 Jan 2017 05:10:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/HDP-Spark-Hbase-Connector-Cell-Versions/m-p/131522#M51689</guid>
      <dc:creator>mqureshi</dc:creator>
      <dc:date>2017-01-18T05:10:47Z</dc:date>
    </item>
  </channel>
</rss>

