<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to do a cleanup of hdfs files older than a certain date using a bash script in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/182147#M58615</link>
    <description>&lt;P&gt;Below post has one example script which deletes files older than certain days:&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/19204/do-we-have-any-script-which-we-can-use-to-clean-tm.html" target="_blank"&gt;https://community.hortonworks.com/questions/19204/do-we-have-any-script-which-we-can-use-to-clean-tm.html&lt;/A&gt;
&lt;/P&gt;&lt;PRE&gt;#!/bin/bash
usage="Usage: dir_diff.sh [days]"
if [!"$1"]
then
echo$usage
exit1
fi
now=$(date +%s)
hadoop fs -ls /zone_encr2/ | grep "^d" | while read f; do
dir_date=`echo $f | awk '{print $6}'`
difference=$(( ( $now - $(date -d "$dir_date" +%s) ) / (24 * 60 * 60 ) ))
if [$difference-gt$1]; then
hadoop fs -ls `echo$f| awk '{ print $8 }'`;
fi
done
&lt;/PRE&gt;</description>
    <pubDate>Fri, 31 Mar 2017 04:26:08 GMT</pubDate>
    <dc:creator>namaheshwari</dc:creator>
    <dc:date>2017-03-31T04:26:08Z</dc:date>
    <item>
      <title>How to do a cleanup of hdfs files older than a certain date using a bash script</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/182146#M58614</link>
      <description>&lt;P&gt;How to do a cleanup of hdfs files older than a certain date using a bash script.&lt;/P&gt;&lt;P&gt;I am just looking for a general strategy.&lt;/P&gt;</description>
      <pubDate>Fri, 31 Mar 2017 04:07:10 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/182146#M58614</guid>
      <dc:creator>Former Member</dc:creator>
      <dc:date>2017-03-31T04:07:10Z</dc:date>
    </item>
    <item>
      <title>Re: How to do a cleanup of hdfs files older than a certain date using a bash script</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/182147#M58615</link>
      <description>&lt;P&gt;Below post has one example script which deletes files older than certain days:&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/questions/19204/do-we-have-any-script-which-we-can-use-to-clean-tm.html" target="_blank"&gt;https://community.hortonworks.com/questions/19204/do-we-have-any-script-which-we-can-use-to-clean-tm.html&lt;/A&gt;
&lt;/P&gt;&lt;PRE&gt;#!/bin/bash
usage="Usage: dir_diff.sh [days]"
if [!"$1"]
then
echo$usage
exit1
fi
now=$(date +%s)
hadoop fs -ls /zone_encr2/ | grep "^d" | while read f; do
dir_date=`echo $f | awk '{print $6}'`
difference=$(( ( $now - $(date -d "$dir_date" +%s) ) / (24 * 60 * 60 ) ))
if [$difference-gt$1]; then
hadoop fs -ls `echo$f| awk '{ print $8 }'`;
fi
done
&lt;/PRE&gt;</description>
      <pubDate>Fri, 31 Mar 2017 04:26:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/182147#M58615</guid>
      <dc:creator>namaheshwari</dc:creator>
      <dc:date>2017-03-31T04:26:08Z</dc:date>
    </item>
    <item>
      <title>Re: How to do a cleanup of hdfs files older than a certain date using a bash script</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/353527#M58616</link>
      <description>&lt;P&gt;The script in the accepted solution was not working for me, so I modified it:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="c"&gt;#!/bin/bash
usage="Usage: dir_diff.sh [path] [-gt|-lt] [days]"
if (( $# &amp;lt; 3 ))
  then
  echo $usage
  exit 1
fi
now=$(date +%s)
hdfs dfs -ls $1 | grep -v "^d" | grep -v '^Found ' | while read f; do
  dir_date=`echo $f | awk '{print $6}'`
  difference=$(( ( $now - $(date -d "$dir_date" +%s) ) / (24 * 60 * 60 ) ))
  if [ $difference $2 $3 ]; then
    echo $f
    # hdfs dfs -ls `echo $f| awk '{ print $8 }'`;
  fi
done&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 28 Sep 2022 14:05:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-do-a-cleanup-of-hdfs-files-older-than-a-certain-date/m-p/353527#M58616</guid>
      <dc:creator>cesar_rodriguez</dc:creator>
      <dc:date>2022-09-28T14:05:06Z</dc:date>
    </item>
  </channel>
</rss>

