<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Apache PIG - Ranking with group in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-PIG-Ranking-with-group/m-p/170916#M41905</link>
    <description>&lt;P&gt;Hi experts,&lt;/P&gt;&lt;P&gt;I want to rank my dataset but after/before I need to group my data. My dataset is:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;EMPLOYEE&lt;/TD&gt;&lt;TD&gt;STOCK&lt;/TD&gt;&lt;TD&gt;FURNISHER&lt;/TD&gt;&lt;TD&gt;DATE&lt;/TD&gt;&lt;TD&gt;VALUE&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AB&lt;/TD&gt;&lt;TD&gt;28-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;Now I want to rank my data by Employee and Date  and group them to obtain the sum of Value. I know that I can do this without ranking but it is a requirement the generation of the Rank by Employee and Date. Basically I want to extract the following output:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;EMPLOYEE&lt;/TD&gt;&lt;TD&gt;STOCK&lt;/TD&gt;&lt;TD&gt;FURNISHER&lt;/TD&gt;&lt;TD&gt;DATE&lt;/TD&gt;&lt;TD&gt;VALUE&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AB&lt;/TD&gt;&lt;TD&gt;28-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;To obtain this using Apache PIG I'm using this script:&lt;/P&gt;&lt;PRE&gt;INPUT = LOAD 'FILE_PATH' USING PigStorage(';') as 
  (Employee:Chararray, STOCK:Int, FURNICHER:Chararray, Date:Chararray, Value:Double);
RANKING = rank DATA BY Employee,DATE;
GRP = GROUP RANKING BY FURNISHER;
DATA = FOREACH GRP_by_DATA GENERATE FLATTEN(RANKING);
STORE DATA INTO 'DESTINATION_PATH' USING PigStorage(','); &lt;/PRE&gt;&lt;P&gt;But I'm not returning the desired output &lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt; &lt;/P&gt;&lt;P&gt;Anyone knows how can I do this?&lt;/P&gt;&lt;P&gt;Many thanks!&lt;/P&gt;</description>
    <pubDate>Tue, 27 Sep 2016 16:33:06 GMT</pubDate>
    <dc:creator>m2014227</dc:creator>
    <dc:date>2016-09-27T16:33:06Z</dc:date>
    <item>
      <title>Apache PIG - Ranking with group</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-PIG-Ranking-with-group/m-p/170916#M41905</link>
      <description>&lt;P&gt;Hi experts,&lt;/P&gt;&lt;P&gt;I want to rank my dataset but after/before I need to group my data. My dataset is:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;EMPLOYEE&lt;/TD&gt;&lt;TD&gt;STOCK&lt;/TD&gt;&lt;TD&gt;FURNISHER&lt;/TD&gt;&lt;TD&gt;DATE&lt;/TD&gt;&lt;TD&gt;VALUE&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AB&lt;/TD&gt;&lt;TD&gt;28-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;Now I want to rank my data by Employee and Date  and group them to obtain the sum of Value. I know that I can do this without ranking but it is a requirement the generation of the Rank by Employee and Date. Basically I want to extract the following output:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;EMPLOYEE&lt;/TD&gt;&lt;TD&gt;STOCK&lt;/TD&gt;&lt;TD&gt;FURNISHER&lt;/TD&gt;&lt;TD&gt;DATE&lt;/TD&gt;&lt;TD&gt;VALUE&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;A&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;AB&lt;/TD&gt;&lt;TD&gt;28-01-2016&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;B&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;AA&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;C&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;AC&lt;/TD&gt;&lt;TD&gt;27-01-2016&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;To obtain this using Apache PIG I'm using this script:&lt;/P&gt;&lt;PRE&gt;INPUT = LOAD 'FILE_PATH' USING PigStorage(';') as 
  (Employee:Chararray, STOCK:Int, FURNICHER:Chararray, Date:Chararray, Value:Double);
RANKING = rank DATA BY Employee,DATE;
GRP = GROUP RANKING BY FURNISHER;
DATA = FOREACH GRP_by_DATA GENERATE FLATTEN(RANKING);
STORE DATA INTO 'DESTINATION_PATH' USING PigStorage(','); &lt;/PRE&gt;&lt;P&gt;But I'm not returning the desired output &lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt; &lt;/P&gt;&lt;P&gt;Anyone knows how can I do this?&lt;/P&gt;&lt;P&gt;Many thanks!&lt;/P&gt;</description>
      <pubDate>Tue, 27 Sep 2016 16:33:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-PIG-Ranking-with-group/m-p/170916#M41905</guid>
      <dc:creator>m2014227</dc:creator>
      <dc:date>2016-09-27T16:33:06Z</dc:date>
    </item>
    <item>
      <title>Re: Apache PIG - Ranking with group</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-PIG-Ranking-with-group/m-p/170917#M41906</link>
      <description>&lt;P&gt;This produces the results you want:&lt;/P&gt;&lt;PRE&gt;RAW = LOAD 'filepath' USING PigStorage(';') as 
  (Employee:Chararray, Stock:Int, Furnisher:Chararray, Date:Chararray, Value:Double);
RANKING = rank RAW BY Employee, Date DENSE;
GRP = GROUP RANKING BY $0;
SUMMED = foreach GRP {
     summed = SUM(RANKING.Value);
     generate $0, summed as Ranksum;
}
JOINED = join RANKING by $0, SUMMED by $0;
FINAL= foreach JOINED generate $0, Employee, Stock, Furnisher, Date, Ranksum;
STORE FINAL INTO 'destinationpath' USING PigStorage(','); &lt;/PRE&gt;&lt;P&gt;Let me know this is what you are looking for by accepting the answer.  If I did not get the requirements correct, please clarify.&lt;/P&gt;</description>
      <pubDate>Tue, 27 Sep 2016 20:17:30 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Apache-PIG-Ranking-with-group/m-p/170917#M41906</guid>
      <dc:creator>gkeys</dc:creator>
      <dc:date>2016-09-27T20:17:30Z</dc:date>
    </item>
  </channel>
</rss>

