<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Retrive  hdfs files after a specific time stamp in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93744#M57354</link>
    <description>&lt;P&gt;You can use a script like this to create snapshots of old and new files - i.e. search files which are older than 3 days and search for files which are newer than 3 days, just make sure, you use the correct path to the cloudera jars. In the case of CDH5.15:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;#!/bin/bash
now=`date +"%Y-%m-%dT%H:%M:%S"`
hdfs dfs -rm /data/cleanup_report/part=older3days/*
hdfs dfs -rm /data/cleanup_report/part=newer3days/*


hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-1.0.0-cdh5.15.1.jar org.apache.solr.hadoop.HdfsFindTool -find /data -type d -mtime +3 | sed "s/^/${now}\tolder3days\t/" | hadoop fs -put - /data/cleanup_report/part=older3days/data.csv


hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-1.0.0-cdh5.15.1.jar org.apache.solr.hadoop.HdfsFindTool -find /data -type d -mtime -3 | sed "s/^/${now}\tnewer3days\t/" | hadoop fs -put - /data/cleanup_report/part=newer3days/data.csv&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then create an external table with partitions on top of this HDFS folder.&lt;/P&gt;</description>
    <pubDate>Tue, 06 Aug 2019 15:23:40 GMT</pubDate>
    <dc:creator>Tomas79</dc:creator>
    <dc:date>2019-08-06T15:23:40Z</dc:date>
    <item>
      <title>Retrive  hdfs files after a specific time stamp</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93685#M57319</link>
      <description>&lt;P&gt;Hi All&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am trying to achieve similar functionality to below Unix command in HDFS level.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;find /temp -name '*.avro' -cnewer sample.avro&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;or to retrieve the files greater than a specific timestamp from HDFS level.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;From the Hadoop documentation I came to know that&amp;nbsp; we have limited functionality of&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://hadoop.apache.org/docs/r2.7.1/hadoop-project-dist/hadoop-common/FileSystemShell.html#find" target="_blank" rel="noopener"&gt;https://hadoop.apache.org/docs/r2.7.1/hadoop-project-dist/hadoop-common/FileSystemShell.html#find&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Let me know how this can be achieved in Hadoop level. Any workarounds.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks - Muthu&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Aug 2019 09:10:27 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93685#M57319</guid>
      <dc:creator>smkmuthu</dc:creator>
      <dc:date>2019-08-05T09:10:27Z</dc:date>
    </item>
    <item>
      <title>Re: Retrive  hdfs files after a specific time stamp</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93713#M57332</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/35353"&gt;@smkmuthu&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you are using CDH Distribution, you can use HdfsFindTool to accomplish this.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Sample Command to find files older than 3 days in the directory "/user/hive" from now:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-1.0.0-cdh5.15.1.jar org.apache.solr.hadoop.HdfsFindTool -find /user/hive -type f -mtime -3&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P class="p1"&gt;&lt;SPAN class="s1"&gt;Please modify the /opt/cloudera/parcels path in the command as per the version of CDH you are using and the target directory as per the requirement. More details about HdfsFindTool can be found &lt;A href="https://www.cloudera.com/documentation/enterprise/5-15-x/topics/search_hdfsfindtool.html" target="_blank" rel="noopener"&gt;HERE&lt;/A&gt;.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class="p1"&gt;&amp;nbsp;&lt;/P&gt;&lt;P class="p1"&gt;&lt;SPAN class="s1"&gt;Hope it helps!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Aug 2019 13:38:54 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93713#M57332</guid>
      <dc:creator>Gomathinayagam</dc:creator>
      <dc:date>2019-08-05T13:38:54Z</dc:date>
    </item>
    <item>
      <title>Re: Retrive  hdfs files after a specific time stamp</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93744#M57354</link>
      <description>&lt;P&gt;You can use a script like this to create snapshots of old and new files - i.e. search files which are older than 3 days and search for files which are newer than 3 days, just make sure, you use the correct path to the cloudera jars. In the case of CDH5.15:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;#!/bin/bash
now=`date +"%Y-%m-%dT%H:%M:%S"`
hdfs dfs -rm /data/cleanup_report/part=older3days/*
hdfs dfs -rm /data/cleanup_report/part=newer3days/*


hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-1.0.0-cdh5.15.1.jar org.apache.solr.hadoop.HdfsFindTool -find /data -type d -mtime +3 | sed "s/^/${now}\tolder3days\t/" | hadoop fs -put - /data/cleanup_report/part=older3days/data.csv


hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-1.0.0-cdh5.15.1.jar org.apache.solr.hadoop.HdfsFindTool -find /data -type d -mtime -3 | sed "s/^/${now}\tnewer3days\t/" | hadoop fs -put - /data/cleanup_report/part=newer3days/data.csv&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then create an external table with partitions on top of this HDFS folder.&lt;/P&gt;</description>
      <pubDate>Tue, 06 Aug 2019 15:23:40 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Retrive-hdfs-files-after-a-specific-time-stamp/m-p/93744#M57354</guid>
      <dc:creator>Tomas79</dc:creator>
      <dc:date>2019-08-06T15:23:40Z</dc:date>
    </item>
  </channel>
</rss>

