<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: merge csv files based on a column timestamp to get one file in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/merge-csv-files-based-on-a-column-timestamp-to-get-one-file/m-p/136343#M98992</link>
    <description>&lt;P&gt;Sol 1: Reduce side join&lt;/P&gt;&lt;P&gt;create separate mappers for all 4 csv files and produce the time stamp as key from all mappers and remaining fields + tag field to represent from which file it is returned as value. &lt;/P&gt;&lt;P&gt;handle them in reduce side..&lt;/P&gt;&lt;P&gt;Sol 2: Map side join (if 3 files are small)&lt;/P&gt;&lt;P&gt;add 3 csv files into distributed cache and merge them with large csv file in mapper.&lt;/P&gt;</description>
    <pubDate>Fri, 03 Mar 2017 22:51:18 GMT</pubDate>
    <dc:creator>balaram38489</dc:creator>
    <dc:date>2017-03-03T22:51:18Z</dc:date>
  </channel>
</rss>

