<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Is it possible to use a Treeset collection object inside the mapper? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20564#M3279</link>
    <description>&lt;P&gt;Gautam,&lt;/P&gt;&lt;P&gt;Here is the sample code I tried. I set the numberof reducers to 0 to check the map output.&lt;/P&gt;&lt;P&gt;I am getting errors shown at the end&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;import java.util.Iterator;&lt;BR /&gt;import java.util.TreeSet;&lt;BR /&gt;import org.apache.hadoop.io.IntWritable;&lt;BR /&gt;import org.apache.hadoop.io.LongWritable;&lt;BR /&gt;import org.apache.hadoop.io.Text;&lt;BR /&gt;import org.apache.hadoop.mapreduce.Mapper;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;public class KPWordCountMapper extends Mapper&amp;lt;LongWritable, Text, Text, IntWritable&amp;gt;{&lt;BR /&gt;&lt;BR /&gt;int count = 0;&lt;BR /&gt;&lt;BR /&gt;@Override&lt;BR /&gt;public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException&lt;BR /&gt;{&lt;BR /&gt;TreeSet&amp;lt;String&amp;gt; ts = new TreeSet&amp;lt;&amp;gt;();&lt;BR /&gt;&lt;BR /&gt;String line = inputVal.toString();&lt;BR /&gt;&lt;BR /&gt;String[] splits = line.split("\\W+");&lt;BR /&gt;&lt;BR /&gt;for(String outputKey:splits)&lt;BR /&gt;&lt;BR /&gt;if(outputKey.length() &amp;gt; 0){&lt;BR /&gt;ts.add(outputKey);&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;Iterator&amp;lt;String&amp;gt; itr= ts.iterator();&lt;BR /&gt;while(itr.hasNext()){&lt;BR /&gt;//System.out.println(itr.next());&lt;BR /&gt;context.write(new Text(itr.next()),new IntWritable(itr.next().length()));&lt;BR /&gt;}&lt;BR /&gt;}&lt;BR /&gt;}&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;14/10/21 03:14:12 INFO input.FileInputFormat: Total input paths to process : 5&lt;BR /&gt;14/10/21 03:14:12 INFO util.NativeCodeLoader: Loaded the native-hadoop library&lt;BR /&gt;14/10/21 03:14:12 WARN snappy.LoadSnappy: Snappy native library not loaded&lt;BR /&gt;14/10/21 03:14:12 INFO mapred.JobClient: Running job: job_201410120206_0080&lt;BR /&gt;14/10/21 03:14:13 INFO mapred.JobClient: map 0% reduce 0%&lt;BR /&gt;14/10/21 03:14:28 INFO mapred.JobClient: Task Id : attempt_201410120206_0080_m_000000_0, Status : FAILED&lt;BR /&gt;java.util.NoSuchElementException&lt;BR /&gt;at java.util.TreeMap$PrivateEntryIterator.nextEntry(TreeMap.java:1113)&lt;BR /&gt;at java.util.TreeMap$KeyIterator.next(TreeMap.java:1169)&lt;BR /&gt;at KPWordCountMapper.map(KPWordCountMapper.java:51)&lt;BR /&gt;at KPWordCountMapper.map(KPWordCountMapper.java:1)&lt;BR /&gt;at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)&lt;BR /&gt;at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)&lt;BR /&gt;at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)&lt;BR /&gt;at org.apache.hadoop.mapred.Child$4.run(Child.java:255)&lt;BR /&gt;at java.security.AccessController.doPrivileged(Native Method)&lt;BR /&gt;at javax.security.auth.Subject.doAs(Subject.java:415)&lt;BR /&gt;at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)&lt;BR /&gt;at org.apache.hadoop.mapred.Child.main(Child.java:249)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any ideas??&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 20 Oct 2014 21:50:32 GMT</pubDate>
    <dc:creator>Skill_Fresh</dc:creator>
    <dc:date>2014-10-20T21:50:32Z</dc:date>
    <item>
      <title>Is it possible to use a Treeset collection object inside the mapper?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20146#M3277</link>
      <description />
      <pubDate>Tue, 14 Oct 2014 02:13:18 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20146#M3277</guid>
      <dc:creator>Skill_Fresh</dc:creator>
      <dc:date>2014-10-14T02:13:18Z</dc:date>
    </item>
    <item>
      <title>Re: Is it possible to use a Treeset collection object inside the mapper?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20458#M3278</link>
      <description>It should be possible to any of the Java collection classes in your&lt;BR /&gt;application. Have you tried it already and are you facing problems?&lt;BR /&gt;&lt;BR /&gt;</description>
      <pubDate>Sun, 19 Oct 2014 08:41:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20458#M3278</guid>
      <dc:creator>GautamG</dc:creator>
      <dc:date>2014-10-19T08:41:15Z</dc:date>
    </item>
    <item>
      <title>Re: Is it possible to use a Treeset collection object inside the mapper?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20564#M3279</link>
      <description>&lt;P&gt;Gautam,&lt;/P&gt;&lt;P&gt;Here is the sample code I tried. I set the numberof reducers to 0 to check the map output.&lt;/P&gt;&lt;P&gt;I am getting errors shown at the end&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;import java.util.Iterator;&lt;BR /&gt;import java.util.TreeSet;&lt;BR /&gt;import org.apache.hadoop.io.IntWritable;&lt;BR /&gt;import org.apache.hadoop.io.LongWritable;&lt;BR /&gt;import org.apache.hadoop.io.Text;&lt;BR /&gt;import org.apache.hadoop.mapreduce.Mapper;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;public class KPWordCountMapper extends Mapper&amp;lt;LongWritable, Text, Text, IntWritable&amp;gt;{&lt;BR /&gt;&lt;BR /&gt;int count = 0;&lt;BR /&gt;&lt;BR /&gt;@Override&lt;BR /&gt;public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException&lt;BR /&gt;{&lt;BR /&gt;TreeSet&amp;lt;String&amp;gt; ts = new TreeSet&amp;lt;&amp;gt;();&lt;BR /&gt;&lt;BR /&gt;String line = inputVal.toString();&lt;BR /&gt;&lt;BR /&gt;String[] splits = line.split("\\W+");&lt;BR /&gt;&lt;BR /&gt;for(String outputKey:splits)&lt;BR /&gt;&lt;BR /&gt;if(outputKey.length() &amp;gt; 0){&lt;BR /&gt;ts.add(outputKey);&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;Iterator&amp;lt;String&amp;gt; itr= ts.iterator();&lt;BR /&gt;while(itr.hasNext()){&lt;BR /&gt;//System.out.println(itr.next());&lt;BR /&gt;context.write(new Text(itr.next()),new IntWritable(itr.next().length()));&lt;BR /&gt;}&lt;BR /&gt;}&lt;BR /&gt;}&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;14/10/21 03:14:12 INFO input.FileInputFormat: Total input paths to process : 5&lt;BR /&gt;14/10/21 03:14:12 INFO util.NativeCodeLoader: Loaded the native-hadoop library&lt;BR /&gt;14/10/21 03:14:12 WARN snappy.LoadSnappy: Snappy native library not loaded&lt;BR /&gt;14/10/21 03:14:12 INFO mapred.JobClient: Running job: job_201410120206_0080&lt;BR /&gt;14/10/21 03:14:13 INFO mapred.JobClient: map 0% reduce 0%&lt;BR /&gt;14/10/21 03:14:28 INFO mapred.JobClient: Task Id : attempt_201410120206_0080_m_000000_0, Status : FAILED&lt;BR /&gt;java.util.NoSuchElementException&lt;BR /&gt;at java.util.TreeMap$PrivateEntryIterator.nextEntry(TreeMap.java:1113)&lt;BR /&gt;at java.util.TreeMap$KeyIterator.next(TreeMap.java:1169)&lt;BR /&gt;at KPWordCountMapper.map(KPWordCountMapper.java:51)&lt;BR /&gt;at KPWordCountMapper.map(KPWordCountMapper.java:1)&lt;BR /&gt;at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)&lt;BR /&gt;at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)&lt;BR /&gt;at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)&lt;BR /&gt;at org.apache.hadoop.mapred.Child$4.run(Child.java:255)&lt;BR /&gt;at java.security.AccessController.doPrivileged(Native Method)&lt;BR /&gt;at javax.security.auth.Subject.doAs(Subject.java:415)&lt;BR /&gt;at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)&lt;BR /&gt;at org.apache.hadoop.mapred.Child.main(Child.java:249)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any ideas??&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 20 Oct 2014 21:50:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20564#M3279</guid>
      <dc:creator>Skill_Fresh</dc:creator>
      <dc:date>2014-10-20T21:50:32Z</dc:date>
    </item>
    <item>
      <title>Re: Is it possible to use a Treeset collection object inside the mapper?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20870#M3280</link>
      <description>&lt;P&gt;Gautam,&lt;/P&gt;&lt;P&gt;You are right. The treeset is treated like any collection object within MR&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The following Mapper code worked for me&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;import java.io.IOException;&lt;BR /&gt;import java.util.Iterator;&lt;BR /&gt;import java.util.TreeSet;&lt;BR /&gt;import org.apache.hadoop.io.LongWritable;&lt;BR /&gt;import org.apache.hadoop.io.NullWritable;&lt;BR /&gt;import org.apache.hadoop.io.Text;&lt;BR /&gt;import org.apache.hadoop.mapreduce.Mapper;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;public class KPWordCountMapper extends Mapper&amp;lt;LongWritable, Text, Text, NullWritable&amp;gt;{&lt;BR /&gt;int count = 0;&lt;BR /&gt;&lt;BR /&gt;@Override&lt;BR /&gt;public void map(LongWritable inputKey,Text inputVal,Context context) throws IOException,InterruptedException&lt;BR /&gt;{&lt;/P&gt;&lt;P&gt;TreeSet&amp;lt;String&amp;gt; ts = new TreeSet&amp;lt;&amp;gt;();&lt;BR /&gt;&lt;BR /&gt;String line = inputVal.toString();&lt;BR /&gt;&lt;BR /&gt;String[] splits = line.split("\\W+");&lt;BR /&gt;&lt;BR /&gt;for(String outputKey:splits)&lt;BR /&gt;&lt;BR /&gt;if(outputKey.length() &amp;gt; 0){&lt;BR /&gt;ts.add(outputKey);&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;Iterator&amp;lt;String&amp;gt; itr= ts.iterator();&lt;BR /&gt;while(itr.hasNext()){&lt;BR /&gt;context.write(new Text(itr.next()),NullWritable.get());&lt;BR /&gt;}&lt;BR /&gt;}&lt;BR /&gt;}&lt;/P&gt;</description>
      <pubDate>Sun, 26 Oct 2014 17:17:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-it-possible-to-use-a-Treeset-collection-object-inside-the/m-p/20870#M3280</guid>
      <dc:creator>Skill_Fresh</dc:creator>
      <dc:date>2014-10-26T17:17:51Z</dc:date>
    </item>
  </channel>
</rss>

