<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: nutch web crawling using hbase  in hortonworks in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162394#M21217</link>
    <description>&lt;P&gt;i got this error message&lt;/P&gt;&lt;P&gt;[root@sandbox ~]# bin/nutch fetch 1456727546-2019589981&lt;/P&gt;&lt;P&gt;Exception in thread "main" java.lang.RuntimeException: job failed: name=apache-nutch-2.3.1.jar, jobid=job_local522155708_0001 at org.apache.nutch.util.NutchJob.waitForCompletion(NutchJob.java:120) at org.apache.nutch.fetcher.FetcherJob.run(FetcherJob.java:205) at org.apache.nutch.fetcher.FetcherJob.fetch(FetcherJob.java:251) at org.apache.nutch.fetcher.FetcherJob.run(FetcherJob.java:314) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.nutch.fetcher.FetcherJob.main(FetcherJob.java:322)&lt;/P&gt;</description>
    <pubDate>Mon, 29 Feb 2016 14:39:59 GMT</pubDate>
    <dc:creator>hadoopsmi</dc:creator>
    <dc:date>2016-02-29T14:39:59Z</dc:date>
    <item>
      <title>nutch web crawling using hbase  in hortonworks</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162391#M21214</link>
      <description>&lt;P&gt;i want crawl the web urls information  using nutch and store the data in hbase db. any one can suggest for how to do this with some example.  bcoz i am new one for nutch. &lt;/P&gt;</description>
      <pubDate>Mon, 29 Feb 2016 12:36:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162391#M21214</guid>
      <dc:creator>hadoopsmi</dc:creator>
      <dc:date>2016-02-29T12:36:16Z</dc:date>
    </item>
    <item>
      <title>Re: nutch web crawling using hbase  in hortonworks</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162392#M21215</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/838/hadoopsmi.html" nodeid="838"&gt;@sivasaravanakumar k&lt;/A&gt;  Off topic :  &lt;A href="http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_search/index.html" target="_blank"&gt;http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_search/index.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Nutch --&amp;gt; &lt;A href="http://wiki.apache.org/nutch/NutchHadoopSingleNodeTutorial" target="_blank"&gt;http://wiki.apache.org/nutch/NutchHadoopSingleNodeTutorial&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You can use the same for multinode cluster &lt;/P&gt;</description>
      <pubDate>Mon, 29 Feb 2016 12:57:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162392#M21215</guid>
      <dc:creator>nsabharwal</dc:creator>
      <dc:date>2016-02-29T12:57:47Z</dc:date>
    </item>
    <item>
      <title>Re: nutch web crawling using hbase  in hortonworks</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162393#M21216</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/838/hadoopsmi.html" nodeid="838"&gt;@sivasaravanakumar k&lt;/A&gt;  FYI: Nutch is not part of HDP stack &lt;/P&gt;</description>
      <pubDate>Mon, 29 Feb 2016 13:00:07 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162393#M21216</guid>
      <dc:creator>nsabharwal</dc:creator>
      <dc:date>2016-02-29T13:00:07Z</dc:date>
    </item>
    <item>
      <title>Re: nutch web crawling using hbase  in hortonworks</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162394#M21217</link>
      <description>&lt;P&gt;i got this error message&lt;/P&gt;&lt;P&gt;[root@sandbox ~]# bin/nutch fetch 1456727546-2019589981&lt;/P&gt;&lt;P&gt;Exception in thread "main" java.lang.RuntimeException: job failed: name=apache-nutch-2.3.1.jar, jobid=job_local522155708_0001 at org.apache.nutch.util.NutchJob.waitForCompletion(NutchJob.java:120) at org.apache.nutch.fetcher.FetcherJob.run(FetcherJob.java:205) at org.apache.nutch.fetcher.FetcherJob.fetch(FetcherJob.java:251) at org.apache.nutch.fetcher.FetcherJob.run(FetcherJob.java:314) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.nutch.fetcher.FetcherJob.main(FetcherJob.java:322)&lt;/P&gt;</description>
      <pubDate>Mon, 29 Feb 2016 14:39:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162394#M21217</guid>
      <dc:creator>hadoopsmi</dc:creator>
      <dc:date>2016-02-29T14:39:59Z</dc:date>
    </item>
    <item>
      <title>Re: nutch web crawling using hbase  in hortonworks</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162395#M21218</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/838/hadoopsmi.html" nodeid="838"&gt;@sivasaravanakumar k&lt;/A&gt;  &lt;A href="http://nutch.apache.org/"&gt;http://nutch.apache.org/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Recommender: Apache Hadoop 2.5.2&lt;/P&gt;&lt;P&gt;I highly recommend to take a look on this &lt;A href="http://stackoverflow.com/questions/4269632/an-alternative-web-crawler-to-nutch"&gt;http://stackoverflow.com/questions/4269632/an-alternative-web-crawler-to-nutch&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Nutch tutorial &lt;A href="http://cs.boisestate.edu/~amit/research/nutch/Nutch-Hadoop-Cluster-Howto.html" target="_blank"&gt;http://cs.boisestate.edu/~amit/research/nutch/Nutch-Hadoop-Cluster-Howto.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 29 Feb 2016 15:49:29 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/nutch-web-crawling-using-hbase-in-hortonworks/m-p/162395#M21218</guid>
      <dc:creator>nsabharwal</dc:creator>
      <dc:date>2016-02-29T15:49:29Z</dc:date>
    </item>
  </channel>
</rss>

