<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Spark : How to make calls to database using foreachPartition in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark-How-to-make-calls-to-database-using-foreachPartition/m-p/123339#M86083</link>
    <description>&lt;P&gt;We have spark streaming job ..writing data to AmazonDynamoDB using &lt;STRONG&gt;foreachRDD&lt;/STRONG&gt; but it is very slow with our consumption rate at 10,000/sec and writing 10,000 takes 35min ...this is the code piece&lt;/P&gt;&lt;P&gt;From research learnt that using &lt;STRONG&gt;foreachpartition&lt;/STRONG&gt; and creating a connection per partition will help ..but not sure how to go about writing code for it ..will greatly appreciate if someone can help with this ...Also any other suggestion to speed up writing is greatly appreciated&lt;/P&gt;&lt;PRE&gt;tempRequestsWithState.foreachRDD { rdd =&amp;gt;


    if ((rdd != null) &amp;amp;&amp;amp; (rdd.count() &amp;gt; 0) &amp;amp;&amp;amp; (!rdd.isEmpty()) ) {


      rdd.foreachPartition {


        case (topicsTableName, hashKeyTemp, attributeValueUpdate) =&amp;gt; {
          
          val client = new AmazonDynamoDBClient()
          val request = new UpdateItemRequest(topicsTableName, hashKeyTemp, attributeValueUpdate)
          try client.updateItem(request)




        catch {


            case se: Exception =&amp;gt; println("Error executing updateItem!\nTable ", se)


        }


        }


        case null =&amp;gt;




      }
    }
  }
&lt;/PRE&gt;</description>
    <pubDate>Fri, 24 Feb 2017 12:37:24 GMT</pubDate>
    <dc:creator>chmamidala</dc:creator>
    <dc:date>2017-02-24T12:37:24Z</dc:date>
  </channel>
</rss>

