<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Suggestions for Bulk Loading Large Files into HBase Tables in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393197#M248369</link>
    <description>&lt;P&gt;Dear Cloudera Community,&lt;/P&gt;&lt;P&gt;We are currently conducting warehouse testing using Apache HBase and need to load large files into HBase tables. Could you kindly suggest any tools or specifically designed for bulk loading large datasets into HBase?&lt;/P&gt;&lt;P&gt;Thank You!&lt;/P&gt;</description>
    <pubDate>Mon, 09 Sep 2024 05:21:47 GMT</pubDate>
    <dc:creator>Amandi</dc:creator>
    <dc:date>2024-09-09T05:21:47Z</dc:date>
    <item>
      <title>Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393197#M248369</link>
      <description>&lt;P&gt;Dear Cloudera Community,&lt;/P&gt;&lt;P&gt;We are currently conducting warehouse testing using Apache HBase and need to load large files into HBase tables. Could you kindly suggest any tools or specifically designed for bulk loading large datasets into HBase?&lt;/P&gt;&lt;P&gt;Thank You!&lt;/P&gt;</description>
      <pubDate>Mon, 09 Sep 2024 05:21:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393197#M248369</guid>
      <dc:creator>Amandi</dc:creator>
      <dc:date>2024-09-09T05:21:47Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393211#M248376</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/115535"&gt;@Amandi&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Check below blog to know how to use bulk loading in hbase with examples -&lt;/P&gt;&lt;P&gt;&lt;A href="https://blog.cloudera.com/how-to-use-hbase-bulk-loading-and-why/" target="_blank"&gt;https://blog.cloudera.com/how-to-use-hbase-bulk-loading-and-why/&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;&lt;EM&gt;Was your question answered? Please take some time to click on "Accept as Solution" -- If you find a reply useful, say thanks by clicking on the thumbs up button below this post. &lt;/EM&gt;&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 09 Sep 2024 10:02:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393211#M248376</guid>
      <dc:creator>shubham_sharma</dc:creator>
      <dc:date>2024-09-09T10:02:16Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393356#M248428</link>
      <description>&lt;P&gt;hello shubham,&lt;BR /&gt;&lt;BR /&gt;I am encountering errors while running the following command:&lt;/P&gt;&lt;P&gt;&amp;nbsp;"/home/super/hbase/bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=HBASE_ROW_KEY,Name,Age,Gender my_1 /hbase/test2.txt"&lt;BR /&gt;&lt;BR /&gt;The error message is:&lt;/P&gt;&lt;P&gt;2024-09-11 15:53:44,808 INFO [main] impl.YarnClientImpl: Submitted application application_1725184331906_0018&lt;BR /&gt;2024-09-11 15:53:44,847 INFO [main] mapreduce.Job: The url to track the job: &lt;A href="http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0018/" target="_blank"&gt;http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0018/&lt;/A&gt;&lt;BR /&gt;2024-09-11 15:53:44,848 INFO [main] mapreduce.Job: Running job: job_1725184331906_0018&lt;BR /&gt;2024-09-11 15:53:52,941 INFO [main] mapreduce.Job: Job job_1725184331906_0018 running in uber mode : false&lt;BR /&gt;2024-09-11 15:53:52,943 INFO [main] mapreduce.Job: map 0% reduce 0%&lt;BR /&gt;2024-09-11 15:53:52,952 INFO [main] mapreduce.Job: 2]Container exited with a non-zero exit code 1. Error file: prelaunch.err.&lt;BR /&gt;Last 4096 bytes of prelaunch.err :&lt;BR /&gt;Last 4096 bytes of stderr :&lt;BR /&gt;log4j:WARN No appenders could be found for logger (org.apache.hadoop.mapreduce.v2.app.MRAppMaster).&lt;BR /&gt;log4j:WARN Please initialize the log4j system properly.&lt;BR /&gt;log4j:WARN See &lt;A href="http://logging.apache.org/log4j/1.2/faq.html#noconfig" target="_blank"&gt;http://logging.apache.org/log4j/1.2/faq.html#noconfig&lt;/A&gt; for more info.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;[2024-09-11 15:53:51.942]Container exited with a non-zero exit code 1. Error file: prelaunch.err.&lt;BR /&gt;Last 4096 bytes of prelaunch.err :&lt;BR /&gt;Last 4096 bytes of stderr :&lt;BR /&gt;log4j:WARN No appenders could be found for logger (org.apache.hadoop.mapreduce.v2.app.MRAppMaster).&lt;BR /&gt;log4j:WARN Please initialize the log4j system properly.&lt;BR /&gt;log4j:WARN See &lt;A href="http://logging.apache.org/log4j/1.2/faq.html#noconfig" target="_blank"&gt;http://logging.apache.org/log4j/1.2/faq.html#noconfig&lt;/A&gt; for more info.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;For more detailed output, check the application tracking page: http://___________________________________________/cluster/app/application_1725184331906_0018 Then click on links to logs of each attempt.&lt;BR /&gt;. Failing the application.&lt;BR /&gt;2024-09-11 15:53:52,967 INFO [main] mapreduce.Job: Counters: 0&lt;BR /&gt;[super@dc1-apache-hbase mapreduce-job]$&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN class="hljs-string"&gt;What&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;are&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;the&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;possible&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;solutions,&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;and&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;how&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;can&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;I&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;fix&lt;/SPAN&gt; &lt;SPAN class="hljs-string"&gt;this?&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 11 Sep 2024 10:37:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393356#M248428</guid>
      <dc:creator>Amandi</dc:creator>
      <dc:date>2024-09-11T10:37:01Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393365#M248429</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/115535"&gt;@Amandi&lt;/a&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;The failure seems to be when launching containers in the pre-launch stage.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Since the ImportTsv&amp;nbsp;job uses MR and yarn as underlying services for running jobs, could you please confirm that a simple MR pi job from a yarn gateway node is able to run without any issues: &lt;/SPAN&gt;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN&gt;# hadoop jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 10 100&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 11 Sep 2024 11:25:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393365#M248429</guid>
      <dc:creator>shubham_sharma</dc:creator>
      <dc:date>2024-09-11T11:25:04Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393396#M248439</link>
      <description>&lt;P&gt;hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/80101"&gt;@shubham_sharma&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;I ran a simple job as you requested, and it seems to have run without any issues. For your reference, I am attaching the output as well.&lt;/P&gt;&lt;P&gt;Output:&lt;/P&gt;&lt;P&gt;[super@dc1-apache-hbase mapreduce]$ /home/super/hadoop/bin/hadoop jar /home/super/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar pi 10 100&lt;BR /&gt;Number of Maps = 10&lt;BR /&gt;Samples per Map = 100&lt;BR /&gt;2024-09-12 08:25:59,664 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable&lt;BR /&gt;Wrote input for Map #0&lt;BR /&gt;Wrote input for Map #1&lt;BR /&gt;Wrote input for Map #2&lt;BR /&gt;Wrote input for Map #3&lt;BR /&gt;Wrote input for Map #4&lt;BR /&gt;Wrote input for Map #5&lt;BR /&gt;Wrote input for Map #6&lt;BR /&gt;Wrote input for Map #7&lt;BR /&gt;Wrote input for Map #8&lt;BR /&gt;Wrote input for Map #9&lt;BR /&gt;Starting Job&lt;BR /&gt;2024-09-12 08:26:01,082 INFO client.DefaultNoHARMFailoverProxyProvider: Connecting to ResourceManager at /0.0.0.0:8032&lt;BR /&gt;2024-09-12 08:26:01,580 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/super/.staging/job_1725184331906_0019&lt;BR /&gt;2024-09-12 08:26:01,730 INFO input.FileInputFormat: Total input files to process : 10&lt;BR /&gt;2024-09-12 08:26:01,775 INFO mapreduce.JobSubmitter: number of splits:10&lt;BR /&gt;2024-09-12 08:26:01,931 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1725184331906_0019&lt;BR /&gt;2024-09-12 08:26:01,931 INFO mapreduce.JobSubmitter: Executing with tokens: []&lt;BR /&gt;2024-09-12 08:26:02,186 INFO conf.Configuration: resource-types.xml not found&lt;BR /&gt;2024-09-12 08:26:02,187 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.&lt;BR /&gt;2024-09-12 08:26:02,284 INFO impl.YarnClientImpl: Submitted application application_1725184331906_0019&lt;BR /&gt;2024-09-12 08:26:02,317 INFO mapreduce.Job: The url to track the job: &lt;A href="http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0019/" target="_blank"&gt;http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0019/&lt;/A&gt;&lt;BR /&gt;2024-09-12 08:26:02,318 INFO mapreduce.Job: Running job: job_1725184331906_0019&lt;BR /&gt;2024-09-12 08:26:10,443 INFO mapreduce.Job: Job job_1725184331906_0019 running in uber mode : false&lt;BR /&gt;2024-09-12 08:26:10,445 INFO mapreduce.Job: map 0% reduce 0%&lt;BR /&gt;2024-09-12 08:26:20,637 INFO mapreduce.Job: map 60% reduce 0%&lt;BR /&gt;2024-09-12 08:26:27,696 INFO mapreduce.Job: map 100% reduce 0%&lt;BR /&gt;2024-09-12 08:26:29,713 INFO mapreduce.Job: map 100% reduce 100%&lt;BR /&gt;2024-09-12 08:26:31,745 INFO mapreduce.Job: Job job_1725184331906_0019 completed successfully&lt;BR /&gt;2024-09-12 08:26:31,912 INFO mapreduce.Job: Counters: 54&lt;BR /&gt;File System Counters&lt;BR /&gt;FILE: Number of bytes read=67&lt;BR /&gt;FILE: Number of bytes written=3407061&lt;BR /&gt;FILE: Number of read operations=0&lt;BR /&gt;FILE: Number of large read operations=0&lt;BR /&gt;FILE: Number of write operations=0&lt;BR /&gt;HDFS: Number of bytes read=2650&lt;BR /&gt;HDFS: Number of bytes written=215&lt;BR /&gt;HDFS: Number of read operations=45&lt;BR /&gt;HDFS: Number of large read operations=0&lt;BR /&gt;HDFS: Number of write operations=3&lt;BR /&gt;HDFS: Number of bytes read erasure-coded=0&lt;BR /&gt;Job Counters&lt;BR /&gt;Launched map tasks=10&lt;BR /&gt;Launched reduce tasks=1&lt;BR /&gt;Data-local map tasks=10&lt;BR /&gt;Total time spent by all maps in occupied slots (ms)=68883&lt;BR /&gt;Total time spent by all reduces in occupied slots (ms)=6614&lt;BR /&gt;Total time spent by all map tasks (ms)=68883&lt;BR /&gt;Total time spent by all reduce tasks (ms)=6614&lt;BR /&gt;Total vcore-milliseconds taken by all map tasks=68883&lt;BR /&gt;Total vcore-milliseconds taken by all reduce tasks=6614&lt;BR /&gt;Total megabyte-milliseconds taken by all map tasks=70536192&lt;BR /&gt;Total megabyte-milliseconds taken by all reduce tasks=6772736&lt;BR /&gt;Map-Reduce Framework&lt;BR /&gt;Map input records=10&lt;BR /&gt;Map output records=20&lt;BR /&gt;Map output bytes=180&lt;BR /&gt;Map output materialized bytes=250&lt;BR /&gt;Input split bytes=1470&lt;BR /&gt;Combine input records=0&lt;BR /&gt;Combine output records=0&lt;BR /&gt;Reduce input groups=2&lt;BR /&gt;Reduce shuffle bytes=250&lt;BR /&gt;Reduce input records=20&lt;BR /&gt;Reduce output records=0&lt;BR /&gt;Spilled Records=40&lt;BR /&gt;Shuffled Maps =10&lt;BR /&gt;Failed Shuffles=0&lt;BR /&gt;Merged Map outputs=10&lt;BR /&gt;GC time elapsed (ms)=1565&lt;BR /&gt;CPU time spent (ms)=6180&lt;BR /&gt;Physical memory (bytes) snapshot=3867213824&lt;BR /&gt;Virtual memory (bytes) snapshot=28280057856&lt;BR /&gt;Total committed heap usage (bytes)=3555196928&lt;BR /&gt;Peak Map Physical memory (bytes)=369606656&lt;BR /&gt;Peak Map Virtual memory (bytes)=2578915328&lt;BR /&gt;Peak Reduce Physical memory (bytes)=284368896&lt;BR /&gt;Peak Reduce Virtual memory (bytes)=2575523840&lt;BR /&gt;Shuffle Errors&lt;BR /&gt;BAD_ID=0&lt;BR /&gt;CONNECTION=0&lt;BR /&gt;IO_ERROR=0&lt;BR /&gt;WRONG_LENGTH=0&lt;BR /&gt;WRONG_MAP=0&lt;BR /&gt;WRONG_REDUCE=0&lt;BR /&gt;File Input Format Counters&lt;BR /&gt;Bytes Read=1180&lt;BR /&gt;File Output Format Counters&lt;BR /&gt;Bytes Written=97&lt;BR /&gt;Job Finished in 31.068 seconds&lt;BR /&gt;Estimated value of Pi is 3.14800000000000000000&lt;/P&gt;</description>
      <pubDate>Thu, 12 Sep 2024 03:02:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393396#M248439</guid>
      <dc:creator>Amandi</dc:creator>
      <dc:date>2024-09-12T03:02:04Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393658#M248519</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/115535"&gt;@Amandi&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We need to check the Job logs in order to find out why it is failing -&lt;/P&gt;&lt;P&gt;&lt;A href="http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0018/" target="_blank" rel="nofollow noopener noreferrer"&gt;http://dc1-apache-hbase.mobitel.lk:8088/proxy/application_1725184331906_0018/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Also, we can check Resource Manager logs to check if there is any issue with permissions or launching containers.&lt;/P&gt;</description>
      <pubDate>Wed, 18 Sep 2024 19:27:54 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/393658#M248519</guid>
      <dc:creator>shubham_sharma</dc:creator>
      <dc:date>2024-09-18T19:27:54Z</dc:date>
    </item>
    <item>
      <title>Re: Suggestions for Bulk Loading Large Files into HBase Tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/394187#M248676</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/115535"&gt;@Amandi&lt;/a&gt;,&amp;nbsp;Did the response help resolve your query? If it did, kindly mark the relevant reply as the solution, as it will aid others in locating the answer more easily in the future.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 30 Sep 2024 05:52:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Suggestions-for-Bulk-Loading-Large-Files-into-HBase-Tables/m-p/394187#M248676</guid>
      <dc:creator>VidyaSargur</dc:creator>
      <dc:date>2024-09-30T05:52:16Z</dc:date>
    </item>
  </channel>
</rss>

