<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: SQOOP Import to Snappy ORC in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167923#M130253</link>
    <description>&lt;P&gt;if my hive table is a external table located on hdfs, could this solution work? thanks&lt;/P&gt;,&lt;P&gt;if my hive table is a external table ,could this solution work?&lt;/P&gt;</description>
    <pubDate>Sat, 27 May 2017 16:28:48 GMT</pubDate>
    <dc:creator>nicksxs</dc:creator>
    <dc:date>2017-05-27T16:28:48Z</dc:date>
    <item>
      <title>SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167915#M130245</link>
      <description>&lt;P&gt;Can Sqoop tranfers data to HDFS as a Compressed Snappy in ORC format ?  And can then go directly into a hive table?&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 02:02:03 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167915#M130245</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-05-26T02:02:03Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167916#M130246</link>
      <description>&lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt;&lt;P&gt;Currently Sqoop doesn't direct ORC hive table import, please refer below Jira.&lt;/P&gt;&lt;P&gt;&lt;A href="https://issues.apache.org/jira/browse/SQOOP-2192"&gt;https://issues.apache.org/jira/browse/SQOOP-2192&lt;/A&gt;&lt;/P&gt;&lt;P&gt;As a workaround, we need to import the data in a temp table with text format through sqoop and further copy the data from tmp table to ORC format table. &lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 02:18:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167916#M130246</guid>
      <dc:creator>jyadav</dc:creator>
      <dc:date>2016-05-26T02:18:06Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167917#M130247</link>
      <description>&lt;P&gt;Thanks, I was afraid that was the case.&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 02:23:14 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167917#M130247</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-05-26T02:23:14Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167918#M130248</link>
      <description>&lt;P&gt;This is not true. Sqoop can directly import into a snappy compressed ORC table using HCatalog. Refer to my answer on how to do this. &lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 04:59:21 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167918#M130248</guid>
      <dc:creator>ravi1</dc:creator>
      <dc:date>2016-05-26T04:59:21Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167919#M130249</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt; Here is an example of sqooping data in compressed ORC format (without needing to use intermediate table). I see that this is not well documented. &lt;/P&gt;&lt;PRE&gt;sqoop import --connect jdbc:mysql://localhost/employees --username hive --password hive --table departments --hcatalog-database default --hcatalog-table my_table_orc --create-hcatalog-table --hcatalog-storage-stanza "stored as orcfile"&lt;/PRE&gt;&lt;P&gt;This example uses default ORC compression. If you want snappy, you can create the table in advance with property set to snappy compression and then take out '--create-hcatalog-table'&lt;/P&gt;&lt;P&gt;More details are in this thread. &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/28060/can-sqoop-be-used-to-directly-import-data-into-an.html" target="_blank"&gt;https://community.hortonworks.com/questions/28060/can-sqoop-be-used-to-directly-import-data-into-an.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 04:59:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167919#M130249</guid>
      <dc:creator>ravi1</dc:creator>
      <dc:date>2016-05-26T04:59:49Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167920#M130250</link>
      <description>&lt;P&gt;I agree with &lt;A rel="user" href="https://community.cloudera.com/users/216/ravi.html" nodeid="216"&gt;@Ravi Mutyala&lt;/A&gt;.. So unaccepted the answer to get right answer in place..&lt;/P&gt;</description>
      <pubDate>Thu, 26 May 2016 12:59:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167920#M130250</guid>
      <dc:creator>pardeep_kumar</dc:creator>
      <dc:date>2016-05-26T12:59:08Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167921#M130251</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt; Here is a quick and dirty way to do it. Had some time, so tried &lt;A rel="user" href="https://community.cloudera.com/users/216/ravi.html" nodeid="216"&gt;@Ravi Mutyala&lt;/A&gt;'s suggestion and it works :).&lt;/P&gt;&lt;PRE&gt;hive&amp;gt; create table test_orc_sqoop (name varchar(20)) ;
OK
Time taken: 0.729 seconds
hive&amp;gt; select * from test_orc_sqoop;
OK
Time taken: 0.248 seconds
hive&amp;gt; exit;


[hdfs@test-mon-wmt ~]$ sqoop import --connect jdbc:mysql://mon-WMT-upgrade.cloud.hortonworks.com/test --username test --password hadoop --table test --hcatalog-database default --hcatalog-table test_orc_sqoop --hcatalog-storage-stanza "stored as orcfile" -m 1
Warning: /usr/hdp/2.4.2.0-258/hbase does not exist! HBase imports will fail.
Please set $HBASE_HOME to the root of your HBase installation.
Warning: /usr/hdp/2.4.2.0-258/accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
16/05/26 05:56:03 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6.2.4.2.0-258
16/05/26 05:56:03 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
16/05/26 05:56:04 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
16/05/26 05:56:04 INFO tool.CodeGenTool: Beginning code generation
16/05/26 05:56:05 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `test` AS t LIMIT 1
16/05/26 05:56:05 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `test` AS t LIMIT 1
16/05/26 05:56:05 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /usr/hdp/2.4.2.0-258/hadoop-mapreduce
Note: /tmp/sqoop-hdfs/compile/64f04ad998cebf113bf8ec1efdbf6b95/test.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
16/05/26 05:56:10 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-hdfs/compile/64f04ad998cebf113bf8ec1efdbf6b95/test.jar
16/05/26 05:56:10 WARN manager.MySQLManager: It looks like you are importing from mysql.
16/05/26 05:56:10 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
16/05/26 05:56:10 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
16/05/26 05:56:10 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
16/05/26 05:56:10 INFO mapreduce.ImportJobBase: Beginning import of test
16/05/26 05:56:11 INFO hcat.SqoopHCatUtilities: Configuring HCatalog for import job
16/05/26 05:56:11 INFO hcat.SqoopHCatUtilities: Configuring HCatalog specific details for job
16/05/26 05:56:11 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `test` AS t LIMIT 1
16/05/26 05:56:11 INFO hcat.SqoopHCatUtilities: Database column names projected : [name]
16/05/26 05:56:11 INFO hcat.SqoopHCatUtilities: Database column name - info map :
name : [Type : 12,Precision : 20,Scale : 0]
16/05/26 05:56:12 INFO hive.metastore: Trying to connect to metastore with URI thrift://rm-wmt-upgrade.cloud.hortonworks.com:9083
16/05/26 05:56:12 INFO hive.metastore: Connected to metastore.
16/05/26 05:56:14 INFO hcat.SqoopHCatUtilities: HCatalog full table schema fields = [name]
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: HCatalog table partitioning key fields = []
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: HCatalog projected schema fields = [name]
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: HCatalog job : Hive Home = /usr/hdp/current/hive-client
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: HCatalog job:  HCatalog Home = /usr/hdp/2.4.2.0-258//sqoop/../hive-hcatalog
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: Adding jar files under /usr/hdp/2.4.2.0-258//sqoop/../hive-hcatalog/share/hcatalog to distributed cache
..............

...........
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: Adding jar files under /usr/hdp/2.4.2.0-258//sqoop/../hive-hcatalog/share/hcatalog/storage-handlers to distributed cache (recursively)
16/05/26 05:56:18 WARN hcat.SqoopHCatUtilities: No files under /usr/hdp/2.4.2.0-258/sqoop/../hive-hcatalog/share/hcatalog/storage-handlers to add to distributed cache for hcatalog job
16/05/26 05:56:18 INFO hcat.SqoopHCatUtilities: Validating dynamic partition keys
16/05/26 05:56:18 WARN hcat.SqoopHCatUtilities: The HCatalog field name has type varchar(20).  Expected = varchar based on database column type : VARCHAR
16/05/26 05:56:18 WARN hcat.SqoopHCatUtilities: The Sqoop job can fail if types are not  assignment compatible
16/05/26 05:56:18 INFO mapreduce.DataDrivenImportJob: Configuring mapper for HCatalog import job
16/05/26 05:56:20 INFO impl.TimelineClientImpl: Timeline service address: &lt;A href="http://rm-wmt-upgrade.cloud.hortonworks.com:8188/ws/v1/timeline/" target="_blank"&gt;http://rm-wmt-upgrade.cloud.hortonworks.com:8188/ws/v1/timeline/&lt;/A&gt;
16/05/26 05:56:21 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
16/05/26 05:57:07 INFO db.DBInputFormat: Using read commited transaction isolation
16/05/26 05:57:07 INFO mapreduce.JobSubmitter: number of splits:1
16/05/26 05:57:07 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1464036504999_0008
16/05/26 05:57:11 INFO impl.YarnClientImpl: Submitted application application_1464036504999_0008
16/05/26 05:57:11 INFO mapreduce.Job: The url to track the job: &lt;A href="http://nn1-wmt-upgrade.cloud.hortonworks.com:8088/proxy/application_1464036504999_0008/" target="_blank"&gt;http://nn1-wmt-upgrade.cloud.hortonworks.com:8088/proxy/application_1464036504999_0008/&lt;/A&gt;
16/05/26 05:57:11 INFO mapreduce.Job: Running job: job_1464036504999_0008
16/05/26 05:57:56 INFO mapreduce.Job: Job job_1464036504999_0008 running in uber mode : false
16/05/26 05:57:56 INFO mapreduce.Job:  map 0% reduce 0%
16/05/26 05:58:25 INFO mapreduce.Job:  map 100% reduce 0%
16/05/26 05:58:27 INFO mapreduce.Job: Job job_1464036504999_0008 completed successfully
16/05/26 05:58:27 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=302189
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=22
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=17899
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=17899
Total vcore-seconds taken by all map tasks=17899
Total megabyte-seconds taken by all map tasks=12207118
Map-Reduce Framework
Map input records=3
Map output records=3
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=189
CPU time spent (ms)=2610
Physical memory (bytes) snapshot=172310528
Virtual memory (bytes) snapshot=2470862848
Total committed heap usage (bytes)=58130432
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=0
16/05/26 05:58:27 INFO mapreduce.ImportJobBase: Transferred 22 bytes in 129.1108 seconds (0.1704 bytes/sec)
16/05/26 05:58:27 INFO mapreduce.ImportJobBase: Retrieved 3 records.


[hdfs@test-mon-wmt ~]$ hive
WARNING: Use "yarn jar" to launch YARN applications.
Logging initialized using configuration in file:/etc/hive/2.4.2.0-258/0/hive-log4j.properties
hive&amp;gt; select * from test_orc_sqoop;
OK
abcd
abcdef
abcdefghi
Time taken: 4.31 seconds, Fetched: 3 row(s)&lt;/PRE&gt;</description>
      <pubDate>Thu, 26 May 2016 13:03:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167921#M130251</guid>
      <dc:creator>pardeep_kumar</dc:creator>
      <dc:date>2016-05-26T13:03:51Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167922#M130252</link>
      <description>&lt;P&gt;For version 1.4.5 is not possible to import the data from RDBMS to ORC Hive table directly, you need to execute the workaround mentioned by &lt;A href="https://community.hortonworks.com/users/9304/tspann.html"&gt;@Timothy Spann&lt;/A&gt;.&lt;/P&gt;</description>
      <pubDate>Tue, 25 Apr 2017 02:59:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167922#M130252</guid>
      <dc:creator>rodrigorondena1</dc:creator>
      <dc:date>2017-04-25T02:59:05Z</dc:date>
    </item>
    <item>
      <title>Re: SQOOP Import to Snappy ORC</title>
      <link>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167923#M130253</link>
      <description>&lt;P&gt;if my hive table is a external table located on hdfs, could this solution work? thanks&lt;/P&gt;,&lt;P&gt;if my hive table is a external table ,could this solution work?&lt;/P&gt;</description>
      <pubDate>Sat, 27 May 2017 16:28:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/SQOOP-Import-to-Snappy-ORC/m-p/167923#M130253</guid>
      <dc:creator>nicksxs</dc:creator>
      <dc:date>2017-05-27T16:28:48Z</dc:date>
    </item>
  </channel>
</rss>

