Below two steps to delete the output directory(not recommended) in MapReduce:
1) using shell:
bin/hadoop dfs -rmr /path/to/your/output/
2) JAVA API:
// configuration should contain reference to your namenode
FileSystem fs = FileSystem.get(new Configuration());
// true stands for recursively deleting the folder you gave
fs.delete(new Path(”/path/to/your/output”), true);
If you want to override the existing:
Need to overwrite the Hadoop OutputFormat class:
public class OverwriteOutputDirOutputFile extends TextOutputFormat{
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job)
throws FileAlreadyExistsException,
InvalidJobConfException, IOException {
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null && job.getNumReduceTasks() != 0) {
throw new InvalidJobConfException(”Output directory not set in JobConf.”);
}
if (outDir != null) {
FileSystem fs = outDir.getFileSystem(job);
// normalize the output directory
outDir = fs.makeQualified(outDir);
setOutputPath(job, outDir);
// get delegation token for the outDir’s file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] {outDir}, job);
// check its existence
/* if (fs.exists(outDir)) {
throw new FileAlreadyExistsException(”Output directory ” + outDir +
” already exists”);
}*/
}
}
}