<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question queries taking forever upgrading to hive-on-tez from MR in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/queries-taking-forever-upgrading-to-hive-on-tez-from-MR/m-p/343660#M233987</link>
    <description>&lt;P&gt;Hive from MR was upgraded to tez with the latest upgrade to cdp 7 and we are seeing significant performance drop. Tried running the same query with static single hour partition just to observe outcome and it took 5hrs to finish whereas it used to complete within 4-5 hrs across the same ORC data set for all 24hrs partition.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;INSERT OVERWRITE TABLE `user_tables`.`dummy_table`&amp;nbsp;PARTITION(date_partition, hour_partition)&lt;BR /&gt;SELECT `(date_partition|hour_partition)?+.+`, to_date(srt.date_time) as date_partition, SUBSTR(srt.date_time, 12, 2) AS hour_partition&lt;BR /&gt;FROM `user_tables`.`source_dummy_table` srt&lt;BR /&gt;WHERE srt.date_partition BETWEEN "2022-04-05" AND date_add("2022-04-05", 4)&lt;BR /&gt;AND upper(srt.prop1) = "XYZ"&lt;BR /&gt;AND to_date(srt.date_time) BETWEEN "2022-04-05" AND "2022-04-05";&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;DAG shows for above:&amp;nbsp;&lt;BR /&gt;VERTICES MODE&amp;nbsp; &amp;nbsp; &amp;nbsp;STATUS&amp;nbsp; &amp;nbsp; &amp;nbsp;TOTAL&amp;nbsp; &amp;nbsp;COMPLETED&amp;nbsp; RUNNING&amp;nbsp; &amp;nbsp; PENDING&amp;nbsp; &amp;nbsp;FAILED&amp;nbsp; &amp;nbsp; &amp;nbsp;KILLED&lt;BR /&gt;----------------------------------------------------------------------------------------------&lt;BR /&gt;1&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; container&amp;nbsp; &amp;nbsp; KILLED&amp;nbsp; &amp;nbsp; 136366&amp;nbsp; &amp;nbsp; &amp;nbsp;9112&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 0&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 127254&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1258&lt;BR /&gt;----------------------------------------------------------------------------------------------&lt;BR /&gt;VERTICES: 00/01 [=&amp;gt;&amp;gt;-------------------------] 6% ELAPSED TIME: 39377.91 s&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;We are using dynamic partitioning because this used to work fine on mr. What memory parameters can be tweaked for tez to make it work because this time line is unrealistic and its a relatively powerful cluster with 34 nodes with enough cores/memory (3TB).&amp;nbsp;&lt;BR /&gt;Below settings already added as per suggestion:&lt;BR /&gt;hive.exec.compress.intermediate=true&lt;BR /&gt;hive.intermediate.compression.codec=org.apache.hadoop.io.compress.SnappyCodec&lt;/P&gt;&lt;P&gt;hive.intermediate.compression.type=BLOCK&lt;/P&gt;&lt;P&gt;hive.exec.parallel=true&lt;/P&gt;&lt;P&gt;hive.enforce.sorting=true&lt;/P&gt;&lt;P&gt;hive.exec.orc.split.strategy=BI&lt;/P&gt;&lt;P&gt;tez.grouping.max-size=67108864&lt;/P&gt;&lt;P&gt;tez.grouping.min-size=67108864&lt;/P&gt;&lt;P&gt;hive.merge.tezfiles=true&lt;/P&gt;&lt;P&gt;hive.merge.smallfiles.avgsize=67108864&lt;/P&gt;&lt;P&gt;hive.merge.size.per.task=134217728&lt;/P&gt;&lt;P&gt;tez.am.resource.memory.mb=16384&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;hive.tez.container.size=16384&lt;BR /&gt;&lt;BR /&gt;Any help or suggestion is appreaciated.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 12 May 2022 13:53:58 GMT</pubDate>
    <dc:creator>Djentlguy</dc:creator>
    <dc:date>2022-05-12T13:53:58Z</dc:date>
  </channel>
</rss>

