<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Should i need to cleaning up of tmp space in hadoop cluster on weekly basis ? if yes how can i do it? please suggest in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117356#M34097</link>
    <description>&lt;A rel="user" href="https://community.cloudera.com/users/10953/grao9704.html" nodeid="10953"&gt;@sankar rao&lt;/A&gt;&lt;P&gt; you shouldn't wipe the entire /tmp directory, this would affect your current jobs indeed.&lt;/P&gt;&lt;P&gt;There's no builtin way to do that but you can cron a job which deletes the files/directories older than x days&lt;/P&gt;&lt;P&gt;You'll find some examples around, here is a shell (dirty but efficient) easy way for cleaning up files only:&lt;/P&gt;&lt;PRE&gt;#!/bin/bash
usage="Usage: dir_diff.sh [days]"

if [ ! "$1" ]
then
  echo $usage
  exit 1
fi

now=$(date +%s)
hadoop fs -ls -R /tmp/ | grep "^-" | while read f; do
  dir_date=`echo $f | awk '{print $6}'`
  difference=$(( ( $now - $(date -d "$dir_date" +%s) ) / (24 * 60 * 60 ) ))

  if [ $difference -gt $1 ]; then
    hdfs dfs -rm -f $(echo $f | awk '{print $NF}');
  fi
done
&lt;/PRE&gt;</description>
    <pubDate>Sat, 09 Jul 2016 00:19:46 GMT</pubDate>
    <dc:creator>ledel</dc:creator>
    <dc:date>2016-07-09T00:19:46Z</dc:date>
    <item>
      <title>Should i need to cleaning up of tmp space in hadoop cluster on weekly basis ? if yes how can i do it? please suggest</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117353#M34094</link>
      <description />
      <pubDate>Thu, 07 Jul 2016 17:13:19 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117353#M34094</guid>
      <dc:creator>g_rao9704</dc:creator>
      <dc:date>2016-07-07T17:13:19Z</dc:date>
    </item>
    <item>
      <title>Re: Should i need to cleaning up of tmp space in hadoop cluster on weekly basis ? if yes how can i do it? please suggest</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117354#M34095</link>
      <description>&lt;P&gt;I'm assuming you are referring to /tmp/ directory in hdfs. You can use below command to clean it up and cron it to run every week.&lt;/P&gt;&lt;PRE&gt;hadoop fs -rm -r /tmp/*&lt;/PRE&gt;</description>
      <pubDate>Fri, 08 Jul 2016 05:24:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117354#M34095</guid>
      <dc:creator>rreddy</dc:creator>
      <dc:date>2016-07-08T05:24:52Z</dc:date>
    </item>
    <item>
      <title>Re: Should i need to cleaning up of tmp space in hadoop cluster on weekly basis ? if yes how can i do it? please suggest</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117355#M34096</link>
      <description>&lt;P&gt;@&lt;A href="https://community.hortonworks.com/users/2667/rreddy.html"&gt;Rahul Reddy&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you so much Rahul...so if i deleted hdfs /tmp directory which is not effect my current jobs?&lt;/P&gt;</description>
      <pubDate>Fri, 08 Jul 2016 13:34:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117355#M34096</guid>
      <dc:creator>g_rao9704</dc:creator>
      <dc:date>2016-07-08T13:34:08Z</dc:date>
    </item>
    <item>
      <title>Re: Should i need to cleaning up of tmp space in hadoop cluster on weekly basis ? if yes how can i do it? please suggest</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117356#M34097</link>
      <description>&lt;A rel="user" href="https://community.cloudera.com/users/10953/grao9704.html" nodeid="10953"&gt;@sankar rao&lt;/A&gt;&lt;P&gt; you shouldn't wipe the entire /tmp directory, this would affect your current jobs indeed.&lt;/P&gt;&lt;P&gt;There's no builtin way to do that but you can cron a job which deletes the files/directories older than x days&lt;/P&gt;&lt;P&gt;You'll find some examples around, here is a shell (dirty but efficient) easy way for cleaning up files only:&lt;/P&gt;&lt;PRE&gt;#!/bin/bash
usage="Usage: dir_diff.sh [days]"

if [ ! "$1" ]
then
  echo $usage
  exit 1
fi

now=$(date +%s)
hadoop fs -ls -R /tmp/ | grep "^-" | while read f; do
  dir_date=`echo $f | awk '{print $6}'`
  difference=$(( ( $now - $(date -d "$dir_date" +%s) ) / (24 * 60 * 60 ) ))

  if [ $difference -gt $1 ]; then
    hdfs dfs -rm -f $(echo $f | awk '{print $NF}');
  fi
done
&lt;/PRE&gt;</description>
      <pubDate>Sat, 09 Jul 2016 00:19:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Should-i-need-to-cleaning-up-of-tmp-space-in-hadoop-cluster/m-p/117356#M34097</guid>
      <dc:creator>ledel</dc:creator>
      <dc:date>2016-07-09T00:19:46Z</dc:date>
    </item>
  </channel>
</rss>

