<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: how can i decide i use spark or Mpareduce ? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171870#M134163</link>
    <description>&lt;P&gt;when i execute Select * from &amp;lt;table&amp;gt; order by &amp;lt;Column name&amp;gt; using computing engine Spark. where it performs order by means data is distributed on cluster, first it combines all selected data at one place and performs order by on multiple node and in which memory ?&lt;/P&gt;</description>
    <pubDate>Fri, 24 Mar 2017 14:12:11 GMT</pubDate>
    <dc:creator>heta_desai</dc:creator>
    <dc:date>2017-03-24T14:12:11Z</dc:date>
    <item>
      <title>how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171866#M134159</link>
      <description>&lt;P&gt;i want to know which computing engine is better in which situatio ?&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Thu, 23 Mar 2017 12:30:39 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171866#M134159</guid>
      <dc:creator>heta_desai</dc:creator>
      <dc:date>2017-03-23T12:30:39Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171867#M134160</link>
      <description>&lt;P&gt;This question is too board in this form. &lt;/P&gt;&lt;P&gt;You need to understand this: if you want to get advise on which solution (computing engine) to choose, you should give a descrption first on what you are trying to accomplish, what kind of problem are you trying to solve, what is the nature of your workload.&lt;/P&gt;</description>
      <pubDate>Thu, 23 Mar 2017 22:54:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171867#M134160</guid>
      <dc:creator>bpgergo</dc:creator>
      <dc:date>2017-03-23T22:54:28Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171868#M134161</link>
      <description>&lt;P&gt;Yes, it highly depends on your specific use case. But if you want to know in general pros and cons of each of these Frameworks, then here is a good quora thread&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.quora.com/What-is-the-difference-between-Apache-Spark-and-Apache-Hadoop-Map-Reduce" target="_blank"&gt;https://www.quora.com/What-is-the-difference-between-Apache-Spark-and-Apache-Hadoop-Map-Reduce&lt;/A&gt;&lt;/P&gt;&lt;P&gt;And also ofcourse the Stack overflow thread&lt;/P&gt;&lt;P&gt;&lt;A href="http://stackoverflow.com/questions/22167684/mapreduce-or-spark" target="_blank"&gt;http://stackoverflow.com/questions/22167684/mapreduce-or-spark&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Mar 2017 01:01:34 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171868#M134161</guid>
      <dc:creator>kbadani</dc:creator>
      <dc:date>2017-03-24T01:01:34Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171869#M134162</link>
      <description>&lt;P&gt;i am new to Hadoop. I want to know how MAPREDUCE and Spark Internally works and what is difference between them that makes Spark execution faster tha MR.&lt;/P&gt;</description>
      <pubDate>Fri, 24 Mar 2017 14:03:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171869#M134162</guid>
      <dc:creator>heta_desai</dc:creator>
      <dc:date>2017-03-24T14:03:52Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171870#M134163</link>
      <description>&lt;P&gt;when i execute Select * from &amp;lt;table&amp;gt; order by &amp;lt;Column name&amp;gt; using computing engine Spark. where it performs order by means data is distributed on cluster, first it combines all selected data at one place and performs order by on multiple node and in which memory ?&lt;/P&gt;</description>
      <pubDate>Fri, 24 Mar 2017 14:12:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171870#M134163</guid>
      <dc:creator>heta_desai</dc:creator>
      <dc:date>2017-03-24T14:12:11Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171871#M134164</link>
      <description>&lt;P&gt;1) MR for batch processing , for loading the Data best suite, but it is slower, compare to Spark.&lt;/P&gt;&lt;P&gt;3) Spark for In memory processing, its faster as it process in Memory only.&lt;/P&gt;</description>
      <pubDate>Fri, 24 Mar 2017 19:59:22 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171871#M134164</guid>
      <dc:creator>shivkumar82015</dc:creator>
      <dc:date>2017-03-24T19:59:22Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171872#M134165</link>
      <description>&lt;P&gt;1) MR for batch processing , for loading the Data best suite, but it is slower, compare to Spark.&lt;/P&gt;&lt;P&gt;2) Spark for In memory processing, its faster as it process in Memory only.&lt;/P&gt;</description>
      <pubDate>Fri, 24 Mar 2017 19:59:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171872#M134165</guid>
      <dc:creator>shivkumar82015</dc:creator>
      <dc:date>2017-03-24T19:59:50Z</dc:date>
    </item>
    <item>
      <title>Re: how can i decide i use spark or Mpareduce ?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171873#M134166</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/16744/hetadesai.html" nodeid="16744"&gt;@heta desai&lt;/A&gt; This slide deck explains you the spark internals in very simple way&lt;/P&gt;&lt;P&gt;&lt;A href="https://spark-summit.org/2014/wp-content/uploads/2014/07/A-Deeper-Understanding-of-Spark-Internals-Aaron-Davidson.pdf"&gt;https://spark-summit.org/2014/wp-content/uploads/2014/07/A-Deeper-Understanding-of-Spark-Internals-Aaron-Davidson.pdf&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Based on this , what i think is that when you do order by - first , data in each partition will be ordered first. And then to achieve universal order, the ordering among partitions would be carried out. Spark won't accumulate all data at one place because thats not possible if data is huge. Spark would try to perform all operations in memory.&lt;/P&gt;&lt;P&gt;Corresponding Stack overflow answer:&lt;/P&gt;&lt;P&gt;&lt;A href="http://stackoverflow.com/questions/32887595/how-does-spark-achieve-sort-order" target="_blank"&gt;http://stackoverflow.com/questions/32887595/how-does-spark-achieve-sort-order&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 31 Mar 2017 03:41:26 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/how-can-i-decide-i-use-spark-or-Mpareduce/m-p/171873#M134166</guid>
      <dc:creator>kbadani</dc:creator>
      <dc:date>2017-03-31T03:41:26Z</dc:date>
    </item>
  </channel>
</rss>

