<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Spark-sklearn integration in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210596#M65942</link>
    <description>&lt;P&gt;Thanks much for your response.&lt;/P&gt;</description>
    <pubDate>Fri, 04 Aug 2017 03:22:22 GMT</pubDate>
    <dc:creator>Chandra</dc:creator>
    <dc:date>2017-08-04T03:22:22Z</dc:date>
    <item>
      <title>Spark-sklearn integration</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210594#M65940</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;We have a Hadoop on-premise cluster and are planning to integrate spark with scikit learn using the spark-sklearn package. Can you please let me know if we need to install sklearn and spark-sklearn package in all nodes or just the node where spark2-history server has been installed. We will be using yarn for resource allocation.&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Chandra&lt;/P&gt;</description>
      <pubDate>Thu, 03 Aug 2017 04:11:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210594#M65940</guid>
      <dc:creator>Chandra</dc:creator>
      <dc:date>2017-08-03T04:11:15Z</dc:date>
    </item>
    <item>
      <title>Re: Spark-sklearn integration</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210595#M65941</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/11008/chandramoulimuthukumaran.html" nodeid="11008"&gt;@chandramouli muthukumaran&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You'll want to install sklearn (pip install -U scikit-learn) and spark-sklearn on all datanodes of the cluster, as well as other relevant python packages such as numpy, scipy, etc. I'd also recommend using YARN as the resource manager, so you are on the right path there. Hope this helps!&lt;/P&gt;</description>
      <pubDate>Thu, 03 Aug 2017 06:22:32 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210595#M65941</guid>
      <dc:creator>dzaratsian</dc:creator>
      <dc:date>2017-08-03T06:22:32Z</dc:date>
    </item>
    <item>
      <title>Re: Spark-sklearn integration</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210596#M65942</link>
      <description>&lt;P&gt;Thanks much for your response.&lt;/P&gt;</description>
      <pubDate>Fri, 04 Aug 2017 03:22:22 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Spark-sklearn-integration/m-p/210596#M65942</guid>
      <dc:creator>Chandra</dc:creator>
      <dc:date>2017-08-04T03:22:22Z</dc:date>
    </item>
  </channel>
</rss>

