<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to connect MongoDB with Hadoop and Spark? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158976#M49204</link>
    <description>&lt;P&gt;&lt;A href="https://community.hortonworks.com/users/14467/janebec.html"&gt;@Jane Becker&lt;/A&gt;&lt;/P&gt;&lt;P&gt;The mongo-hadoop project        connects Hadoop AND SPARK with MongoDB. You can download it from the releases page (https://github.com/mongodb/mongo-hadoop/releases) or build it yourself from &lt;A href="https://github.com/mongodb/mongo-hadoop" target="_blank"&gt;https://github.com/mongodb/mongo-hadoop&lt;/A&gt;. If you decide to build it yourself, you could do it using gradlew and the following steps, then copy the jar into &lt;CODE&gt;lib/&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;PRE&gt;wget -P /tmp/ &lt;A href="https://github.com/mongodb/mongo-hadoop/archive/r1.5.1.tar.gz" target="_blank"&gt;https://github.com/mongodb/mongo-hadoop/archive/r1.5.1.tar.gz&lt;/A&gt;
mkdir mongo-hadoop
tar -xvzf /tmp/r1.5.1.tar.gz -C mongo-hadoop --strip-components=1

# Now build the mongo-hadoop-spark jars
cd mongo-hadoop
./gradlew jar
cd ..
cp mongo-hadoop/spark/build/libs/mongo-hadoop-spark-*.jar lib/&lt;/PRE&gt;</description>
    <pubDate>Sat, 17 Dec 2016 08:05:01 GMT</pubDate>
    <dc:creator>cstanca</dc:creator>
    <dc:date>2016-12-17T08:05:01Z</dc:date>
    <item>
      <title>How to connect MongoDB with Hadoop and Spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158975#M49203</link>
      <description>&lt;P&gt;I did a bit of research and learned about the Mongo-Hadoop project, but I am not clear whether the project is also helpful for connecting to Spark.&lt;/P&gt;</description>
      <pubDate>Sat, 17 Dec 2016 07:56:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158975#M49203</guid>
      <dc:creator>janebec</dc:creator>
      <dc:date>2016-12-17T07:56:01Z</dc:date>
    </item>
    <item>
      <title>Re: How to connect MongoDB with Hadoop and Spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158976#M49204</link>
      <description>&lt;P&gt;&lt;A href="https://community.hortonworks.com/users/14467/janebec.html"&gt;@Jane Becker&lt;/A&gt;&lt;/P&gt;&lt;P&gt;The mongo-hadoop project        connects Hadoop AND SPARK with MongoDB. You can download it from the releases page (https://github.com/mongodb/mongo-hadoop/releases) or build it yourself from &lt;A href="https://github.com/mongodb/mongo-hadoop" target="_blank"&gt;https://github.com/mongodb/mongo-hadoop&lt;/A&gt;. If you decide to build it yourself, you could do it using gradlew and the following steps, then copy the jar into &lt;CODE&gt;lib/&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;PRE&gt;wget -P /tmp/ &lt;A href="https://github.com/mongodb/mongo-hadoop/archive/r1.5.1.tar.gz" target="_blank"&gt;https://github.com/mongodb/mongo-hadoop/archive/r1.5.1.tar.gz&lt;/A&gt;
mkdir mongo-hadoop
tar -xvzf /tmp/r1.5.1.tar.gz -C mongo-hadoop --strip-components=1

# Now build the mongo-hadoop-spark jars
cd mongo-hadoop
./gradlew jar
cd ..
cp mongo-hadoop/spark/build/libs/mongo-hadoop-spark-*.jar lib/&lt;/PRE&gt;</description>
      <pubDate>Sat, 17 Dec 2016 08:05:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158976#M49204</guid>
      <dc:creator>cstanca</dc:creator>
      <dc:date>2016-12-17T08:05:01Z</dc:date>
    </item>
    <item>
      <title>Re: How to connect MongoDB with Hadoop and Spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158977#M49205</link>
      <description>&lt;P&gt;You did not specify the use case, but be aware of some limitations on bson files: &lt;A href="https://github.com/mongodb/mongo-hadoop/wiki/Using-.bson-Files"&gt;https://github.com/mongodb/mongo-hadoop/wiki/Using-.bson-Files&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You may want also to connect pyspark to MongoDB. Good reference: &lt;A href="https://www.mongodb.com/blog/post/using-mongodb-hadoop-spark-part-3-spark-example-key-takeaways"&gt;https://www.mongodb.com/blog/post/using-mongodb-hadoop-spark-part-3-spark-example-key-takeaways&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 17 Dec 2016 08:07:29 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158977#M49205</guid>
      <dc:creator>cstanca</dc:creator>
      <dc:date>2016-12-17T08:07:29Z</dc:date>
    </item>
    <item>
      <title>Re: How to connect MongoDB with Hadoop and Spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158978#M49206</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/3486/cstanca.html" nodeid="3486"&gt;@Constantin Stanca&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Thank you. I'll test it and let you know.&lt;/P&gt;</description>
      <pubDate>Sat, 17 Dec 2016 08:46:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158978#M49206</guid>
      <dc:creator>janebec</dc:creator>
      <dc:date>2016-12-17T08:46:12Z</dc:date>
    </item>
    <item>
      <title>Re: How to connect MongoDB with Hadoop and Spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158979#M49207</link>
      <description>&lt;P&gt;Hi &lt;A rel="user" href="https://community.cloudera.com/users/14467/janebec.html" nodeid="14467"&gt;@Jane Becker&lt;/A&gt;,&lt;/P&gt;&lt;P&gt;Apart from above answer, on the spark note, I believe you can use JDBC to extract the data into DataFrame,&lt;/P&gt;&lt;P&gt; Spark does support jdbc driver to load or save data, and documentation can be found &lt;A href="http://spark.apache.org/docs/latest/sql-programming-guide.html#jdbc-to-other-databases"&gt;here&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="http://spark.apache.org/docs/latest/sql-programming-guide.html#jdbc-to-other-databases"&gt;&lt;/A&gt;PS : I have not tested on mongoDB and hope that works as the mongoDB JDBC driver be in generic JDBC driver standerd.  &lt;/P&gt;</description>
      <pubDate>Tue, 16 May 2017 13:07:19 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-connect-MongoDB-with-Hadoop-and-Spark/m-p/158979#M49207</guid>
      <dc:creator>bkosaraju</dc:creator>
      <dc:date>2017-05-16T13:07:19Z</dc:date>
    </item>
  </channel>
</rss>

