Created 10-13-2014 07:13 PM
Created 10-26-2014 10:17 AM
Gautam,
You are right. The treeset is treated like any collection object within MR
The following Mapper code worked for me
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class KPWordCountMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
int count = 0;
@Override
public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException
{
TreeSet<String> ts = new TreeSet<>();
String line = inputVal.toString();
String[] splits = line.split("\\W+");
for(String outputKey:splits)
if(outputKey.length() > 0){
ts.add(outputKey);
}
Iterator<String> itr= ts.iterator();
while(itr.hasNext()){
context.write(new Text(itr.next()),NullWritable.get());
}
}
}
Created 10-19-2014 01:41 AM
Created 10-20-2014 02:50 PM
Gautam,
Here is the sample code I tried. I set the numberof reducers to 0 to check the map output.
I am getting errors shown at the end
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class KPWordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
int count = 0;
@Override
public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException
{
TreeSet<String> ts = new TreeSet<>();
String line = inputVal.toString();
String[] splits = line.split("\\W+");
for(String outputKey:splits)
if(outputKey.length() > 0){
ts.add(outputKey);
}
Iterator<String> itr= ts.iterator();
while(itr.hasNext()){
//System.out.println(itr.next());
context.write(new Text(itr.next()),new IntWritable(itr.next().length()));
}
}
}
14/10/21 03:14:12 INFO input.FileInputFormat: Total input paths to process : 5
14/10/21 03:14:12 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/10/21 03:14:12 WARN snappy.LoadSnappy: Snappy native library not loaded
14/10/21 03:14:12 INFO mapred.JobClient: Running job: job_201410120206_0080
14/10/21 03:14:13 INFO mapred.JobClient: map 0% reduce 0%
14/10/21 03:14:28 INFO mapred.JobClient: Task Id : attempt_201410120206_0080_m_000000_0, Status : FAILED
java.util.NoSuchElementException
at java.util.TreeMap$PrivateEntryIterator.nextEntry(TreeMap.java:1113)
at java.util.TreeMap$KeyIterator.next(TreeMap.java:1169)
at KPWordCountMapper.map(KPWordCountMapper.java:51)
at KPWordCountMapper.map(KPWordCountMapper.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Any ideas??
Created 10-26-2014 10:17 AM
Gautam,
You are right. The treeset is treated like any collection object within MR
The following Mapper code worked for me
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class KPWordCountMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
int count = 0;
@Override
public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException
{
TreeSet<String> ts = new TreeSet<>();
String line = inputVal.toString();
String[] splits = line.split("\\W+");
for(String outputKey:splits)
if(outputKey.length() > 0){
ts.add(outputKey);
}
Iterator<String> itr= ts.iterator();
while(itr.hasNext()){
context.write(new Text(itr.next()),NullWritable.get());
}
}
}