<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Any work round to avoid duplicate records in impala for Primary key column in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57451#M64962</link>
    <description>&lt;P&gt;Impala does not have a concept of PK .However You have two options&amp;nbsp;&lt;/P&gt;&lt;P&gt;down the road if you want to implement delete single row you cant perform them on Hive / Impala . So you can implement using&amp;nbsp;Impala-kudu format . Kudu format you can create table with primary key , plus you perform single row delete.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;or the hard way to achive this is to&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;STRONG&gt;STEP 1&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;CREATE TABLE Sample
(
    name STRING,
    street  STRING,
    RD123      Timestamp ,(Assume this is unique since we dont have Pk)
    
)

&lt;STRONG&gt;STEP 2&lt;BR /&gt;&lt;/STRONG&gt; 
Perform the LOAD DATA INTO Sample
&lt;BR /&gt;&lt;STRONG&gt;STEP 3 - Create another table &lt;/STRONG&gt;&lt;BR /&gt;
Create table sample_no_dupli AS select SELECT col1,col2,MAX(RD123) AS createdate FROM JLT_STAHING
GROUP BY name,street&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 16 Jul 2017 03:03:07 GMT</pubDate>
    <dc:creator>csguna</dc:creator>
    <dc:date>2017-07-16T03:03:07Z</dc:date>
    <item>
      <title>Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57431#M64958</link>
      <description>&lt;P&gt;Appreciate if any work round to avoid duplicate records in impala for Primary key column.&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 11:55:54 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57431#M64958</guid>
      <dc:creator>Msdhan</dc:creator>
      <dc:date>2022-09-16T11:55:54Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57435#M64959</link>
      <description>&lt;P&gt;Are you asking pertain to inseration or reterival of data ?&lt;/P&gt;</description>
      <pubDate>Sat, 15 Jul 2017 06:10:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57435#M64959</guid>
      <dc:creator>csguna</dc:creator>
      <dc:date>2017-07-15T06:10:11Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57437#M64960</link>
      <description>&lt;P&gt;thinking of avoidng duplicates while insertion if this won't cause performacne issue.&lt;/P&gt;</description>
      <pubDate>Sat, 15 Jul 2017 06:42:17 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57437#M64960</guid>
      <dc:creator>Msdhan</dc:creator>
      <dc:date>2017-07-15T06:42:17Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57448#M64961</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/22384"&gt;@Msdhan&lt;/a&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.cloudera.com/documentation/enterprise/5-3-x/topics/impala_porting.html" target="_blank"&gt;https://www.cloudera.com/documentation/enterprise/5-3-x/topics/impala_porting.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;According to the above link,&amp;nbsp;&lt;SPAN&gt;Take out any&amp;nbsp;&lt;/SPAN&gt;CREATE INDEX&lt;SPAN&gt;,&amp;nbsp;&lt;/SPAN&gt;DROP INDEX&lt;SPAN&gt;, and&amp;nbsp;&lt;/SPAN&gt;ALTER INDEX&lt;SPAN&gt;&amp;nbsp;statements, and equivalent&amp;nbsp;&lt;/SPAN&gt;ALTER TABLE&lt;SPAN&gt;statements. Remove any&amp;nbsp;&lt;/SPAN&gt;INDEX&lt;SPAN&gt;,&amp;nbsp;&lt;/SPAN&gt;KEY&lt;SPAN&gt;, or&amp;nbsp;&lt;/SPAN&gt;PRIMARY KEY&lt;SPAN&gt;&amp;nbsp;clauses from&amp;nbsp;&lt;/SPAN&gt;CREATE TABLE&lt;SPAN&gt;&amp;nbsp;and&amp;nbsp;&lt;/SPAN&gt;ALTER TABLE&lt;SPAN&gt;&amp;nbsp;statements. Impala is optimized for bulk read operations for data warehouse-style queries, and therefore does not support indexes for its tables.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Yes in general, you cannot achieve both Performance and Indexing. So if possible, you can try&amp;nbsp;to control duplicate in the source (select) portion instead of target (insert)&amp;nbsp;portion&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Ex:&lt;/P&gt;&lt;P&gt;insert into table trg_table&amp;nbsp;&lt;/P&gt;&lt;P&gt;select * from src_table&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 15 Jul 2017 23:42:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57448#M64961</guid>
      <dc:creator>saranvisa</dc:creator>
      <dc:date>2017-07-15T23:42:25Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57451#M64962</link>
      <description>&lt;P&gt;Impala does not have a concept of PK .However You have two options&amp;nbsp;&lt;/P&gt;&lt;P&gt;down the road if you want to implement delete single row you cant perform them on Hive / Impala . So you can implement using&amp;nbsp;Impala-kudu format . Kudu format you can create table with primary key , plus you perform single row delete.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;or the hard way to achive this is to&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;STRONG&gt;STEP 1&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;CREATE TABLE Sample
(
    name STRING,
    street  STRING,
    RD123      Timestamp ,(Assume this is unique since we dont have Pk)
    
)

&lt;STRONG&gt;STEP 2&lt;BR /&gt;&lt;/STRONG&gt; 
Perform the LOAD DATA INTO Sample
&lt;BR /&gt;&lt;STRONG&gt;STEP 3 - Create another table &lt;/STRONG&gt;&lt;BR /&gt;
Create table sample_no_dupli AS select SELECT col1,col2,MAX(RD123) AS createdate FROM JLT_STAHING
GROUP BY name,street&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 16 Jul 2017 03:03:07 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57451#M64962</guid>
      <dc:creator>csguna</dc:creator>
      <dc:date>2017-07-16T03:03:07Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57454#M64963</link>
      <description>Thanks Saranvisa for this explanation</description>
      <pubDate>Sun, 16 Jul 2017 03:58:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57454#M64963</guid>
      <dc:creator>Msdhan</dc:creator>
      <dc:date>2017-07-16T03:58:11Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57455#M64964</link>
      <description>&lt;P&gt;&lt;SPAN&gt;csguna, appreciate your&amp;nbsp;inputs. will try this.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 16 Jul 2017 04:01:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57455#M64964</guid>
      <dc:creator>Msdhan</dc:creator>
      <dc:date>2017-07-16T04:01:46Z</dc:date>
    </item>
    <item>
      <title>Re: Any work round to avoid duplicate records in impala for Primary key column</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57456#M64965</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/22384"&gt;@Msdhan&lt;/a&gt;&amp;nbsp;You Welcome :))&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 16 Jul 2017 04:38:54 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Any-work-round-to-avoid-duplicate-records-in-impala-for/m-p/57456#M64965</guid>
      <dc:creator>csguna</dc:creator>
      <dc:date>2017-07-16T04:38:54Z</dc:date>
    </item>
  </channel>
</rss>

