<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to overwrite an existing output file/dir during execution of MapReduce jobs? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-overwrite-an-existing-output-file-dir-during/m-p/214810#M176722</link>
    <description>&lt;P&gt;Below two steps to delete the output directory(not recommended) in &lt;STRONG&gt;&lt;A href="http://data-flair.training/blogs/hadoop-mapreduce-introduction-tutorial-comprehensive-guide/"&gt;MapReduce&lt;/A&gt;&lt;/STRONG&gt;:&lt;BR /&gt;&lt;STRONG&gt;1) using shell:&lt;/STRONG&gt;&lt;BR /&gt;&lt;CODE&gt;bin/hadoop dfs -rmr /path/to/your/output/&lt;/CODE&gt;&lt;BR /&gt;&lt;STRONG&gt;2) JAVA API:&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;// configuration should contain reference to your namenode
FileSystem fs = FileSystem.get(new Configuration());
// true stands for recursively deleting the folder you gave
fs.delete(new Path(”/path/to/your/output”), true);&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;If you want to override the existing:&lt;BR /&gt;Need to overwrite the &lt;STRONG&gt;&lt;A href="http://data-flair.training/blogs/outputformat-recordwriter-hadoop-mapreduce/"&gt;Hadoop OutputFormat&lt;/A&gt;&lt;/STRONG&gt; class:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;public class OverwriteOutputDirOutputFile extends TextOutputFormat{

@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job)
throws FileAlreadyExistsException,
InvalidJobConfException, IOException {
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null &amp;amp;&amp;amp; job.getNumReduceTasks() != 0) {
throw new InvalidJobConfException(”Output directory not set in JobConf.”);
}
if (outDir != null) {
FileSystem fs = outDir.getFileSystem(job);
// normalize the output directory
outDir = fs.makeQualified(outDir);
setOutputPath(job, outDir);

// get delegation token for the outDir’s file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] {outDir}, job);

// check its existence
/* if (fs.exists(outDir)) {
throw new FileAlreadyExistsException(”Output directory ” + outDir +
” already exists”);
}*/
}
} &lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;}&lt;/P&gt;</description>
    <pubDate>Sat, 07 Oct 2017 17:17:44 GMT</pubDate>
    <dc:creator>shreyag1207</dc:creator>
    <dc:date>2017-10-07T17:17:44Z</dc:date>
  </channel>
</rss>

