Community Articles

Find and share helpful community-sourced technical articles.
avatar
New Contributor

Short Description: Example of Play Framework application running integrated Apache Spark applications with direct output to Web.

86559-screen-shot-2018-08-16-at-155557.png

Motivation: Missing articles related to Play and Apache Spark ability to publish output data straight to web.

Used components:

Play is a high-productivity Java and Scala web application framework that integrates the components and APIs you need for modern web application development. Play is based on a lightweight, stateless, web-friendly architecture and features predictable and minimal resource consumption (CPU, memory, threads) for highly-scalable applications thanks to its reactive model, based on Akka Streams.

Link to PlayFramework (https://www.playframework.com)

Apache Spark is a fast and general processing engine compatible with Hadoop data. It can run in Hadoop clusters through YARN or Spark's standalone mode, and it can process data in HDFS, HBase, Cassandra, Hive, and any Hadoop Input Format. It is designed to perform both batch processing (similar to MapReduce) and new workloads like streaming, interactive queries, graph processing and machine learning.

Link to Apache-Spark (https://spark.apache.org)

Scala combines object-oriented and functional programming in one concise, high-level language. Scala's static types help avoid bugs in complex applications, and its JVM and JavaScript runtimes let you build high-performance systems with easy access to huge ecosystems of libraries.

Link to Scala (https://www.scala-lang.org)

Sbt is a build tool for Scala and Java.

Link to Sbt (https://www.scala-sbt.org)

Versions:

  • HDP 3.0.1
  • Apache Spark 2.3.1
  • Play Framework 2.6.X
  • Scala 2.11.8
  • Sbt: 1.12
  • Architecture:

    86560-screen-shot-2018-08-17-at-101037.png

    Code:

    conf/routes

    # Routes
    # Home page
    GET / controllers.Application.index
    # Links to controller
    GET /run controllers.Application.runGET /result_count controllers.Application.result_countGET /result_full controllers.Application.result_full
    # Map static resources from the /public folder to the /assets URL pathGET /assets/*file controllers.Assets.at(path="/public", file)
    

    controllers/Application

    package controllers
    import application.FileCheck.{ f_RowCountDf, f_RowFullDf}
    import javax.inject.Inject
    import play.api.mvc._
    import play.api.data._
    import play.api.data.Forms._
    class Application @Inject() (mcc: MessagesControllerComponents) extends MessagesAbstractController(mcc) {
      //TODO: remove
      val fooForm = Form(single("foo" -> text(maxLength = 20)))
      // App form
      def index = Action { implicit request => Ok(views.html.index(fooForm)) }
      case class AppForm(appid: String)
      val userForm = Form(
        mapping(
          "appid" -> text
        )(AppForm.apply)(AppForm.unapply)
      )
      def run = Action { implicit request: MessagesRequest[AnyContent] =>
        Ok(views.html.execute(userForm.bindFromRequest.get.appid.toString))
      }
      def result_count = Action {
        lazy val v_RowCount: Int = f_RowCountDf("data/data.csv")
        Ok(views.html.result_count("Count of Rows", v_RowCount))
      }
      def result_full = Action {
        lazy val v_RowFull: List[String] = f_RowFullDf("data/data.csv")
        Ok(views.html.result_full("All Rows", v_RowFull))
      }
    }
    

    views/tags/index.scala.html

    @(fooForm: Form[String])(implicit request: MessagesRequestHeader)
        @apps = @{
            Seq("result_count" -> "(APP1) Spark application / Count of records / v.0.1 / updated: 30-07-2018",
                "result_full" -> "(APP3) Spark application / All records / v.1.0 / updated: 30-07-2018")
        }
        @main("Play-Spark", tab = "index") {
            <div class="header-with-logo">
                <div class="header-logo">
                    <img src="@routes.Assets.at("images/horton.png")" width=100>
                </div>
                <div class="header-title">
                    <h1 id="play-bootstrap">Play-Spark</h1>
                    <p class="lead">Play Framework for Apache Spark</p>
                </div>
            </div>
            <p>
                This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
                to execute <a href="#" target="_blank">Apache-Spark</a> applications.<hr>
            </p>
            @b4.vertical.form(routes.Application.run()) { implicit vfc =>
                @b4.select(fooForm("appid"), options = apps, '_label -> "Select Spark Application", '_custom -> true)
                @b4.free() {
                    <button type="submit" class="btn btn-primary"><i class="fa fa-ok"></i>Next >></button>
                }
            }
        }
        @footer("Play-Spark", tab = "index") {
        }
    

    view/tags/execute.scala.html

    @(appid: String)(implicit request: MessagesRequestHeader)
    @implicitFieldConstructor = @{
        b4.vertical.fieldConstructor()
    }
    @main("Play-Spark", tab = "vertical") {
        <div class="header-with-logo">
            <div class="header-logo">
                <img src="@routes.Assets.at("images/horton.png")" width=100>
            </div>
            <div class="header-title">
                <h1 id="play-bootstrap">Play-Spark</h1>
                <p class="lead">Play Framework for Apache Spark</p>
            </div>
        </div>
        <p>
            This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
            to execute <a href="#" target="_blank">Apache-Spark</a> applications.<hr>
        </p>
        <a href="http://localhost:9000/@appid" class="btn btn-primary" role="button" aria-disabled="true">Run</a>
    }
    

    (image - "execute" page preview)

    86561-screen-shot-2018-08-17-at-101710.png

    application/FileCheck.scala

    package application
    import org.apache.spark.sql.DataFrame
    import scala.language.{ implicitConversions, postfixOps }
    object FileCheck extends App with SparkCommons {
      def f_RowCountDf(path: String): Int = {
        import spark.implicits._
        val v_lines = spark
          .read
          .format("csv")
          .option("header", "true")
          .load(path)
        //Works well
        val df_lines = v_lines.toDF()
        df_lines.as[(String, String)].show(false)
        df_lines.count().toInt
      }
      def f_RowFullDf(path: String): List[String] = {
        import spark.implicits._
        val v_lines: DataFrame = spark
          .read
          .format("tsv")
          .option("header", "false")
          .load(path)
        import org.apache.spark.sql.functions.{ concat, lit }
        val df_lines: DataFrame = v_lines.select(concat($"name", lit(" "), $"surname"))
        val df2 = df_lines.withColumnRenamed("concat(name,  , surname)", "name")
        df2.select("name").map(r => r.getString(0)).collect.toList
      }
    }
    

    view/result_count.scala.html

    @(title: String, message: Int)
    @main("Play-Spark", tab = "vertical") {
    <!DOCTYPE html>
    <html lang="en">
        <head>
            <title>@title</title>
        </head>
        <body>
            <div class="header-with-logo">
                <div class="header-logo">
                    <img src="@routes.Assets.at("images/horton.png")" width=100>
                </div>
                <div class="header-title">
                    <h1 id="play-bootstrap">Play-Spark</h1>
                    <p class="lead">Play Framework for Apache Spark</p>
                </div>
            </div>
            <div>
                <p>
                    This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
                    to execute <a href="#" target="_blank">Apache-Spark</a> applications.
                </p>
                <hr>
                <p>
                    Rowcount: @message<br>
                    <a href="http://localhost:9000">Back</a>
                </p>
            </div>
        </body>
    </html>
    }
    @footer("Play-Spark", tab = "index") {
    }
    

    (image - result_count page)

    86562-screen-shot-2018-08-17-at-101741.png

    (image - example of alternative output: table)

    86563-screen-shot-2018-08-17-at-102427.png


screen-shot-2018-08-17-at-101710.pngscreen-shot-2018-08-17-at-101741.pngscreen-shot-2018-08-16-at-155557.pngscreen-shot-2018-08-17-at-102253.pngscreen-shot-2018-08-17-at-102427.png
3,875 Views