<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Partition Retention Period not working on Hive 'Managed' tables in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/351750#M236360</link>
    <description>&lt;P&gt;I am trying to set partition retention times on existing Hive manged tables using the following:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;ALTER TABLE &amp;lt;table name&amp;gt; SET TBLPROPERTIES ('discover.partitions'='true');&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;ALTER TABLE &amp;lt;table name&amp;gt; SET TBLPROPERTIES ('partition.retention.period'='1d');&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;as stated on this page below, however I am still able to search partitions older than a day so it appears to not be working? It does mention on the page that this is for 'external' tables, can anyone let me know if this an 'age off' retention period is possible on managed tables? am I missing any commands etc?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.4/using-hiveql/content/hive-set-partition-retention.html" target="_blank"&gt;https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.4/using-hiveql/content/hive-set-partition-retention.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
    <pubDate>Thu, 08 Sep 2022 08:44:02 GMT</pubDate>
    <dc:creator>Griggsy</dc:creator>
    <dc:date>2022-09-08T08:44:02Z</dc:date>
    <item>
      <title>Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/351750#M236360</link>
      <description>&lt;P&gt;I am trying to set partition retention times on existing Hive manged tables using the following:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;ALTER TABLE &amp;lt;table name&amp;gt; SET TBLPROPERTIES ('discover.partitions'='true');&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;ALTER TABLE &amp;lt;table name&amp;gt; SET TBLPROPERTIES ('partition.retention.period'='1d');&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;as stated on this page below, however I am still able to search partitions older than a day so it appears to not be working? It does mention on the page that this is for 'external' tables, can anyone let me know if this an 'age off' retention period is possible on managed tables? am I missing any commands etc?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.4/using-hiveql/content/hive-set-partition-retention.html" target="_blank"&gt;https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.4/using-hiveql/content/hive-set-partition-retention.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
      <pubDate>Thu, 08 Sep 2022 08:44:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/351750#M236360</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-09-08T08:44:02Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/351815#M236379</link>
      <description>&lt;P&gt;Partition Retention is disabled by default in HDP-3.1.4.x&lt;/P&gt;&lt;P&gt;set&amp;nbsp;&lt;SPAN&gt;metastore.msck.repair.enable.partition.retention=true in ambari -&amp;gt; Hive -&amp;gt; custom hms-site &amp;amp; restart HMS. HMS should automatically take care.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 09 Sep 2022 03:03:53 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/351815#M236379</guid>
      <dc:creator>nramanaiah</dc:creator>
      <dc:date>2022-09-09T03:03:53Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352113#M236439</link>
      <description>&lt;P&gt;Thanks for the reply&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/61058"&gt;@nramanaiah&lt;/a&gt;. I seem to be unable to find an option '&lt;SPAN&gt;metastore.msck.repair.enable.partition.retention' does it need to be added as a custom option and if so under which drop down? Thanks&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 13 Sep 2022 07:34:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352113#M236439</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-09-13T07:34:48Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352168#M236454</link>
      <description>&lt;P&gt;Can you let me know your cluster version ?&lt;/P&gt;&lt;P&gt;Please check AutoPartitionDiscovery thread (PartitionManagementTask) is running in HMS logs.&lt;/P&gt;&lt;P&gt;If it is running, you should be seeing following log line in HMS. Make sure you don't have specific db/table pattern.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;HMS
org.apache.hadoop.hive.metastore.PartitionManagementTask: [Metastore Scheduled Worker 3]: Looking for tables using catalog: hive dbPattern: * tablePattern: * found: 107

HS2/HMS
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker: [HiveServer2-Background-Pool: Thread-123]: Number of partitionsNotInMs=[], partitionsNotOnFs=[], tablesNotInMs=[], tablesNotOnFs=[], expiredPartitions=[]

org.apache.hadoop.hive.metastore.Msck: [HiveServer2-Background-Pool: Thread-123]: hive.default.test_table - #partsNotInMs: 0 #partsNotInFs: 0 #expiredPartitions: 0 lockRequired: true (R: true A: true  true)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In CDP cluster managed by CM, set&amp;nbsp;&lt;SPAN&gt;metastore.msck.repair.enable.partition.retention=true at Hive -&amp;gt; Configuration -&amp;gt;&amp;nbsp;Hive Metastore Server Advanced Configuration Snippet (Safety Valve) for hive-site.xml&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In HDP-3.x managed by Ambari,&amp;nbsp;&lt;SPAN&gt;set&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;metastore.msck.repair.enable.partition.retention=true in Hive -&amp;gt; custom hms-site&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Restart applicable services. expiredPartitions should be visible in logs &amp;amp; should be removed by HMS at scheduled interval once they become eligible.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 13 Sep 2022 18:04:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352168#M236454</guid>
      <dc:creator>nramanaiah</dc:creator>
      <dc:date>2022-09-13T18:04:46Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352254#M236471</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/61058"&gt;@nramanaiah&lt;/a&gt;&amp;nbsp;thanks very much for the help!&lt;/P&gt;</description>
      <pubDate>Wed, 14 Sep 2022 15:40:39 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352254#M236471</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-09-14T15:40:39Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352710#M236581</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/61058"&gt;@nramanaiah&lt;/a&gt;&amp;nbsp;I'm still experiencing some issues with this, I have applied&amp;nbsp;&lt;SPAN&gt;metastore.msck.repair.enable.partition.retention=true&amp;nbsp;and restarted, all looks good as below. I have applied the ALTER table statements to set the retention of 1 day to a test table without error, but when I do a SELECT statement in Beeline I can still see data from last week?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Griggsy_0-1663674220278.png" style="width: 693px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/35664iBF8DA48CA0A51972/image-dimensions/693x142?v=v2" width="693" height="142" role="button" title="Griggsy_0-1663674220278.png" alt="Griggsy_0-1663674220278.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Griggsy_1-1663674319795.png" style="width: 604px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/35665iB2E842C2EA371DB2/image-dimensions/604x210?v=v2" width="604" height="210" role="button" title="Griggsy_1-1663674319795.png" alt="Griggsy_1-1663674319795.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Any idea what I'm missing?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 20 Sep 2022 11:46:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/352710#M236581</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-09-20T11:46:32Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353162#M236651</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/77674"&gt;@Griggsy&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Partition discovery &amp;amp; retention is only for external table &amp;amp; data purge is disabled by default for external tables.&lt;/P&gt;&lt;P&gt;ie., If tblproperties has &lt;STRONG&gt;('discover.partitions'='true' &amp;amp; 'partition.retention.period'='2m' )&lt;/STRONG&gt; without&amp;nbsp;&lt;STRONG&gt;'external.table.purge'='true'&lt;/STRONG&gt;. Consecutive iterations will repeatedly add/remove partition till underlying HDFS partition folder is removed. I suspect you are seeing the same situation.&lt;/P&gt;&lt;P&gt;eg.,&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;==&amp;gt; Expired partition is removed from Metadata in Iteration 1 &amp;lt;== 
2022-09-23T02:43:36,786 INFO  [PartitionDiscoveryTask-1]: metastore.Msck (Msck.java:repair(135)) - hive.default.test - #partsNotInMs: 0 #partsNotInFs: 0 #expiredPartitions: 1 lockRequired: true (R: true A: true  true)
2022-09-23T02:43:36,822 INFO  [PartitionDiscoveryTask-1]: metastore.Msck (Msck.java:repair(287)) - Expired partitions: [test:dept=cse]


==&amp;gt; As HDFS partition folder exist, discover.partition added it back in Iteration 2 &amp;lt;==
2022-09-23T02:48:36,818 INFO  [PartitionDiscoveryTask-1]: metastore.HiveMetaStoreChecker (HiveMetaStoreChecker.java:checkMetastore(144)) - Number of partitionsNotInMs=[test:dept=cse], partitionsNotOnFs=[], tablesNotInMs=[], tablesNotOnFs=[], expiredPartitions=[]
2022-09-23T02:48:36,818 INFO  [PartitionDiscoveryTask-1]: metastore.Msck (Msck.java:repair(135)) - hive.default.test - #partsNotInMs: 1 #partsNotInFs: 0 #expiredPartitions: 0 lockRequired: true (R: true A: true  true)

==&amp;gt; Expired partition is removed from Metadata in Iteration 3 &amp;lt;== 
2022-09-23T02:53:36,805 INFO  [PartitionDiscoveryTask-1]: metastore.Msck (Msck.java:repair(135)) - hive.default.test - #partsNotInMs: 0 #partsNotInFs: 0 #expiredPartitions: 1 lockRequired: true (R: true A: true  true)
2022-09-23T02:53:36,837 INFO  [PartitionDiscoveryTask-1]: metastore.Msck (Msck.java:repair(287)) - Expired partitions: [test:dept=cse]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please use following 3 tblproperties accordingly to make it work as you expect.&lt;/P&gt;&lt;P&gt;&amp;nbsp;1)&amp;nbsp;'discover.partitions'='true' -- To discover partitions from HDFS path or remove partition metadata from HMS when HDFS path is removed.&lt;/P&gt;&lt;P&gt;2)&amp;nbsp;'partition.retention.period'='&amp;lt;retention_period&amp;gt;' -- To remove partitions after retention period&lt;/P&gt;&lt;P&gt;3)&amp;nbsp;'external.table.purge'='true' -- To remove HDFS partition folder, so that partition discover won't add it back to HMS.&lt;/P&gt;&lt;P&gt;PS: To enable global external table purge, set&amp;nbsp;hive.external.table.purge.default=true cluster wide in ambari -&amp;gt; hive -&amp;gt; custom hive-site&lt;/P&gt;</description>
      <pubDate>Fri, 23 Sep 2022 01:36:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353162#M236651</guid>
      <dc:creator>nramanaiah</dc:creator>
      <dc:date>2022-09-23T01:36:49Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353354#M236697</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/77674"&gt;@Griggsy&lt;/a&gt;&amp;nbsp;Hope above suggestion helped. Let me know if otherwise.&lt;/P&gt;</description>
      <pubDate>Mon, 26 Sep 2022 22:54:38 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353354#M236697</guid>
      <dc:creator>nramanaiah</dc:creator>
      <dc:date>2022-09-26T22:54:38Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353394#M236703</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/61058"&gt;@nramanaiah&lt;/a&gt;&amp;nbsp;I haven't had a chance to do further testing yet, I will let you know ASAP. Thanks again for the help.&lt;/P&gt;</description>
      <pubDate>Tue, 27 Sep 2022 08:53:14 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353394#M236703</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-09-27T08:53:14Z</dc:date>
    </item>
    <item>
      <title>Re: Partition Retention Period not working on Hive 'Managed' tables</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353871#M236818</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/61058"&gt;@nramanaiah&lt;/a&gt;&amp;nbsp;have been able to run further testing and confirm that my partitions are purging as expected! thanks again for the assistance!&lt;/P&gt;</description>
      <pubDate>Mon, 03 Oct 2022 14:16:53 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Partition-Retention-Period-not-working-on-Hive-Managed/m-p/353871#M236818</guid>
      <dc:creator>Griggsy</dc:creator>
      <dc:date>2022-10-03T14:16:53Z</dc:date>
    </item>
  </channel>
</rss>

