<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: How to expand a single row with a start and end date into multiple rows, one for each day in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125768#M51360</link>
    <description>&lt;P&gt;works perfectly now. &lt;/P&gt;</description>
    <pubDate>Fri, 24 Feb 2017 03:11:03 GMT</pubDate>
    <dc:creator>adnanalvee</dc:creator>
    <dc:date>2017-02-24T03:11:03Z</dc:date>
    <item>
      <title>How to expand a single row with a start and end date into multiple rows, one for each day</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125764#M51356</link>
      <description>&lt;P&gt;I have a rows like this below.&lt;/P&gt;&lt;PRE&gt;+--------------+-------------------+-------------------+
|            id|    line_start_date|      line_end_date|
+--------------+-------------------+-------------------+
|        ID1   | 8/15/2016 00:00:00| 8/21/2016 23:55:59|
|        ID2   | 1/25/2016 00:00:00| 1/31/2016 23:55:59|
|        ID3   | 2/29/2016 00:00:00| 2/29/2016 23:55:59|
|        ID4   |  2/8/2016 00:00:00| 2/14/2016 23:55:59|
|        ID5   | 7/25/2016 00:00:00| 7/31/2016 23:55:59|
|        ID6   |  8/1/2016 00:00:00|  8/7/2016 23:55:59|
+--------------+-------------------+-------------------+







&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;Note:&lt;/STRONG&gt;date format is "MM/dd/yyyy HH:mm:ss" in UTC&lt;/P&gt;&lt;P&gt;I want to generate individual dates between the start date and end date for each of the Ids.&lt;/P&gt;&lt;P&gt;As a result I will have more rows and easily do a &lt;STRONG&gt;groupBy&lt;/STRONG&gt; to do aggregation I want.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Example Output Required:&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;+--------------+-------------------+
|            id|    dates          |   
+--------------+-------------------+
|        ID1   | 8/15/2016 00:00:00|
|        ID1   | 8/16/2016 00:00:00|
|        ID1   | 8/17/2016 00:00:00| 
|        ...   | ......            | 
|        ...   | ......            | 
+--------------+-------------------+&lt;/PRE&gt;&lt;P&gt;How can I do this in dataframe API? Searched for hours, no clue yet!&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2017 02:10:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125764#M51356</guid>
      <dc:creator>adnanalvee</dc:creator>
      <dc:date>2017-01-12T02:10:12Z</dc:date>
    </item>
    <item>
      <title>Re: How to expand a single row with a start and end date into multiple rows, one for each day</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125765#M51357</link>
      <description>&lt;P&gt;Something similar using RDDs&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Steps &lt;/STRONG&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Read file as RDD&lt;/LI&gt;&lt;LI&gt;Create new RDD - for each line/entry on the file create a list of tuples (id,date), for each date between d1 and d2&lt;/LI&gt;&lt;LI&gt;Flatten the list to generate the final RDD with each id, date combination per row&lt;/LI&gt;&lt;/UL&gt;&lt;PRE&gt; def main(args: Array[String]): Unit = 
{    
var sc = new SparkContext("local[*]", "app1")
varfileRdd = sc.textFile("inFile");    
var explodedRdd = fileRdd.map{x=&amp;gt;getRddList(x)}.flatMap(y=&amp;gt;y)
explodedRdd.saveAsTextFile("outDir")  
}  
def getDaysBetweenDates(startdate: Date, enddate: Date): ListBuffer[String] =    {      
var dateList = new ListBuffer[String]()      
var calendar = new GregorianCalendar()      
calendar.setTime(startdate)      
while (calendar.getTime().before(enddate)) {        
dateList += calendar.getTime().toString()        
calendar.add(Calendar.DATE, 1)     
}      
dateList += calendar.getTime().toString()
dateList    
} 
 
def getRddList(a :String) : ListBuffer[(String,String)] = {    
var allDates = new ListBuffer[(String,String)]()    
val format = new java.text.SimpleDateFormat("yyyy-MM-dd")    
for (x &amp;lt;- getDaysBetweenDates(format.parse(a.split(",")(1)), format.parse(a.split(",")(2)))){  
allDates += ((a.split(",")(0).toString(),x))    
}
allDates  
}&lt;/PRE&gt;</description>
      <pubDate>Thu, 12 Jan 2017 04:55:00 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125765#M51357</guid>
      <dc:creator>arunak</dc:creator>
      <dc:date>2017-01-12T04:55:00Z</dc:date>
    </item>
    <item>
      <title>Re: How to expand a single row with a start and end date into multiple rows, one for each day</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125766#M51358</link>
      <description>&lt;P&gt;++ you could then convert the RDD to a dataframe if required. &lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2017 04:57:07 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125766#M51358</guid>
      <dc:creator>arunak</dc:creator>
      <dc:date>2017-01-12T04:57:07Z</dc:date>
    </item>
    <item>
      <title>Re: How to expand a single row with a start and end date into multiple rows, one for each day</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125767#M51359</link>
      <description>&lt;P&gt;Thanks! I will be testing it soon and will be accepting your answer if it works out.&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jan 2017 07:43:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125767#M51359</guid>
      <dc:creator>adnanalvee</dc:creator>
      <dc:date>2017-01-14T07:43:01Z</dc:date>
    </item>
    <item>
      <title>Re: How to expand a single row with a start and end date into multiple rows, one for each day</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125768#M51360</link>
      <description>&lt;P&gt;works perfectly now. &lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2017 03:11:03 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/How-to-expand-a-single-row-with-a-start-and-end-date-into/m-p/125768#M51360</guid>
      <dc:creator>adnanalvee</dc:creator>
      <dc:date>2017-02-24T03:11:03Z</dc:date>
    </item>
  </channel>
</rss>

