<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Apache kudu in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69617#M80303</link>
    <description>&lt;P&gt;Another option is to write a Spark job that uses multiple tasks to read from Oracle and write to Kudu in parallel, or something equivalent using multiple processes or threads.&lt;/P&gt;</description>
    <pubDate>Thu, 05 Jul 2018 22:35:36 GMT</pubDate>
    <dc:creator>mpercy</dc:creator>
    <dc:date>2018-07-05T22:35:36Z</dc:date>
    <item>
      <title>Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69597#M80299</link>
      <description>&lt;P&gt;hi,I am woking on kudu and oracle. I have more than 5 million records and i have been asked to read them from oracle and write into kudu table.what i did was,one way i did a ojdbc connection,got the records from oracle and insert into kudu table using partial row and insert menthod. i just want to know if i could do bulk inserts to avoid more time on writes&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 15:50:03 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69597#M80299</guid>
      <dc:creator>HJ</dc:creator>
      <dc:date>2022-09-16T15:50:03Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69600#M80300</link>
      <description>&lt;P&gt;can i do bulk insert, if so please tell me how to&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jul 2018 16:44:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69600#M80300</guid>
      <dc:creator>HJ</dc:creator>
      <dc:date>2018-07-05T16:44:35Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69604#M80301</link>
      <description>&lt;P&gt;if i do the writes as per the program given in&amp;nbsp;&lt;A href="https://github.com/cloudera/kudu-examples/tree/master/java/java-sample/src/main/java/org/kududb/examples/sample" target="_blank"&gt;https://github.com/cloudera/kudu-examples/tree/master/java/java-sample/src/main/java/org/kududb/examples/sample&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;it takes an hour to insert the data in kudu table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;How can i insert the records in lesser time&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jul 2018 17:18:45 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69604#M80301</guid>
      <dc:creator>HJ</dc:creator>
      <dc:date>2018-07-05T17:18:45Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69616#M80302</link>
      <description>&lt;P&gt;One option is to export to Parquet on HDFS using Sqoop, then use Impala to&amp;nbsp;CREATE TABLE AS SELECT * FROM your parquet table into your Kudu table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Unfortunately Sqoop does not have support for Kudu at this time.&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jul 2018 22:27:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69616#M80302</guid>
      <dc:creator>mpercy</dc:creator>
      <dc:date>2018-07-05T22:27:08Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69617#M80303</link>
      <description>&lt;P&gt;Another option is to write a Spark job that uses multiple tasks to read from Oracle and write to Kudu in parallel, or something equivalent using multiple processes or threads.&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jul 2018 22:35:36 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69617#M80303</guid>
      <dc:creator>mpercy</dc:creator>
      <dc:date>2018-07-05T22:35:36Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69814#M80304</link>
      <description>&lt;P&gt;thank you but i just want to use java and do batch insert,is tere any way to perfrom faster writes on kudu table using java&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 10 Jul 2018 20:48:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69814#M80304</guid>
      <dc:creator>HJ</dc:creator>
      <dc:date>2018-07-10T20:48:04Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69821#M80305</link>
      <description>&lt;P&gt;Are you sure the bottleneck is Kudu? Maybe the bottleneck is reading from Oracle?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Using the Kudu&amp;nbsp;AUTO_FLUSH_BACKGROUND mode should give pretty fast throughput when writing. See&amp;nbsp;&lt;A href="https://kudu.apache.org/apidocs/org/apache/kudu/client/SessionConfiguration.FlushMode.html" target="_blank"&gt;https://kudu.apache.org/apidocs/org/apache/kudu/client/SessionConfiguration.FlushMode.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You can also try increasing the KuduSession.setMutationBufferSpace() value, also consider your partitioning scheme.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you want more parallelism you can also consider scanning different ranges in Oracle with different processes or threads on the same or different client machine and perform more parallelized writes to Kudu.&lt;/P&gt;</description>
      <pubDate>Tue, 10 Jul 2018 22:00:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/69821#M80305</guid>
      <dc:creator>mpercy</dc:creator>
      <dc:date>2018-07-10T22:00:08Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/90894#M80306</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I need to load the data from HIVE to KUDU table using pyspark code.&amp;nbsp; i am able to insert one record using table.new_insert but could not able to load all the records at once..the way am looking is, getting the data into dataframe and load that dataframe data into KUDU table.&amp;nbsp; i found example using JAVA but not with Python.&amp;nbsp; will you please help.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thx.&lt;/P&gt;</description>
      <pubDate>Fri, 24 May 2019 20:27:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/90894#M80306</guid>
      <dc:creator>SantoshT</dc:creator>
      <dc:date>2019-05-24T20:27:46Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/91085#M80307</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I don't know much about Kudu+PySpark except that there is a lot of room for improvement there, but maybe a couple of examples in the following patch-in-flight could be useful: &lt;A href="https://gerrit.cloudera.org/#/c/13102/" target="_blank"&gt;https://gerrit.cloudera.org/#/c/13102/&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 30 May 2019 16:29:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/91085#M80307</guid>
      <dc:creator>Alexey1c</dc:creator>
      <dc:date>2019-05-30T16:29:32Z</dc:date>
    </item>
    <item>
      <title>Re: Apache kudu</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/286993#M80308</link>
      <description>&lt;P&gt;I am able to sqoop the data from Oracle to HDFS and then do a create table as select * from on Impala to write into Kudu.I am abe to manually run the queries here but What is the best way to&amp;nbsp; automate this when i move the code to production.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Jan 2020 08:35:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-kudu/m-p/286993#M80308</guid>
      <dc:creator>raghu466744</dc:creator>
      <dc:date>2020-01-07T08:35:32Z</dc:date>
    </item>
  </channel>
</rss>

