Member since
11-13-2017
2
Posts
3
Kudos Received
1
Solution
My Accepted Solutions
Title | Views | Posted |
---|---|---|
8024 | 11-28-2018 03:05 PM |
08-17-2018
09:40 PM
1 Kudo
Short Description: Example of Play Framework application running integrated Apache Spark applications with direct output to Web.
Motivation: Missing articles related to Play and Apache Spark ability to publish output data straight to web.
Used components:
Play is a high-productivity Java and Scala web application framework that integrates the components and APIs you need for modern web application development. Play is based on a lightweight, stateless, web-friendly architecture and features predictable and minimal resource consumption (CPU, memory, threads) for highly-scalable applications thanks to its reactive model, based on Akka Streams.
Link to PlayFramework (https://www.playframework.com)
Apache Spark is a fast and general processing engine compatible with Hadoop data. It can run in Hadoop clusters through YARN or Spark's standalone mode, and it can process data in HDFS, HBase, Cassandra, Hive, and any Hadoop Input Format. It is designed to perform both batch processing (similar to MapReduce) and new workloads like streaming, interactive queries, graph processing and machine learning.
Link to Apache-Spark (https://spark.apache.org)
Scala combines object-oriented and functional programming in one concise, high-level language. Scala's static types help avoid bugs in complex applications, and its JVM and JavaScript runtimes let you build high-performance systems with easy access to huge ecosystems of libraries.
Link to Scala (https://www.scala-lang.org)
Sbt is a build tool for Scala and Java.
Link to Sbt (https://www.scala-sbt.org)
Versions:
HDP 3.0.1 Apache Spark 2.3.1 Play Framework 2.6.X Scala 2.11.8 Sbt: 1.12
Architecture:
Code:
conf/routes # Routes
# Home page
GET / controllers.Application.index
# Links to controller
GET /run controllers.Application.runGET /result_count controllers.Application.result_countGET /result_full controllers.Application.result_full
# Map static resources from the /public folder to the /assets URL pathGET /assets/*file controllers.Assets.at(path="/public", file)
controllers/Application package controllers
import application.FileCheck.{ f_RowCountDf, f_RowFullDf}
import javax.inject.Inject
import play.api.mvc._
import play.api.data._
import play.api.data.Forms._
class Application @Inject() (mcc: MessagesControllerComponents) extends MessagesAbstractController(mcc) {
//TODO: remove
val fooForm = Form(single("foo" -> text(maxLength = 20)))
// App form
def index = Action { implicit request => Ok(views.html.index(fooForm)) }
case class AppForm(appid: String)
val userForm = Form(
mapping(
"appid" -> text
)(AppForm.apply)(AppForm.unapply)
)
def run = Action { implicit request: MessagesRequest[AnyContent] =>
Ok(views.html.execute(userForm.bindFromRequest.get.appid.toString))
}
def result_count = Action {
lazy val v_RowCount: Int = f_RowCountDf("data/data.csv")
Ok(views.html.result_count("Count of Rows", v_RowCount))
}
def result_full = Action {
lazy val v_RowFull: List[String] = f_RowFullDf("data/data.csv")
Ok(views.html.result_full("All Rows", v_RowFull))
}
}
views/tags/index.scala.html @(fooForm: Form[String])(implicit request: MessagesRequestHeader)
@apps = @{
Seq("result_count" -> "(APP1) Spark application / Count of records / v.0.1 / updated: 30-07-2018",
"result_full" -> "(APP3) Spark application / All records / v.1.0 / updated: 30-07-2018")
}
@main("Play-Spark", tab = "index") {
<div class="header-with-logo">
<div class="header-logo">
<img src="@routes.Assets.at("images/horton.png")" width=100>
</div>
<div class="header-title">
<h1 id="play-bootstrap">Play-Spark</h1>
<p class="lead">Play Framework for Apache Spark</p>
</div>
</div>
<p>
This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
to execute <a href="#" target="_blank">Apache-Spark</a> applications.<hr>
</p>
@b4.vertical.form(routes.Application.run()) { implicit vfc =>
@b4.select(fooForm("appid"), options = apps, '_label -> "Select Spark Application", '_custom -> true)
@b4.free() {
<button type="submit" class="btn btn-primary"><i class="fa fa-ok"></i>Next >></button>
}
}
}
@footer("Play-Spark", tab = "index") {
}
view/tags/execute.scala.html @(appid: String)(implicit request: MessagesRequestHeader)
@implicitFieldConstructor = @{
b4.vertical.fieldConstructor()
}
@main("Play-Spark", tab = "vertical") {
<div class="header-with-logo">
<div class="header-logo">
<img src="@routes.Assets.at("images/horton.png")" width=100>
</div>
<div class="header-title">
<h1 id="play-bootstrap">Play-Spark</h1>
<p class="lead">Play Framework for Apache Spark</p>
</div>
</div>
<p>
This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
to execute <a href="#" target="_blank">Apache-Spark</a> applications.<hr>
</p>
<a href="http://localhost:9000/@appid" class="btn btn-primary" role="button" aria-disabled="true">Run</a>
}
(image - "execute" page preview)
application/FileCheck.scala package application
import org.apache.spark.sql.DataFrame
import scala.language.{ implicitConversions, postfixOps }
object FileCheck extends App with SparkCommons {
def f_RowCountDf(path: String): Int = {
import spark.implicits._
val v_lines = spark
.read
.format("csv")
.option("header", "true")
.load(path)
//Works well
val df_lines = v_lines.toDF()
df_lines.as[(String, String)].show(false)
df_lines.count().toInt
}
def f_RowFullDf(path: String): List[String] = {
import spark.implicits._
val v_lines: DataFrame = spark
.read
.format("tsv")
.option("header", "false")
.load(path)
import org.apache.spark.sql.functions.{ concat, lit }
val df_lines: DataFrame = v_lines.select(concat($"name", lit(" "), $"surname"))
val df2 = df_lines.withColumnRenamed("concat(name, , surname)", "name")
df2.select("name").map(r => r.getString(0)).collect.toList
}
}
view/result_count.scala.html @(title: String, message: Int)
@main("Play-Spark", tab = "vertical") {
<!DOCTYPE html>
<html lang="en">
<head>
<title>@title</title>
</head>
<body>
<div class="header-with-logo">
<div class="header-logo">
<img src="@routes.Assets.at("images/horton.png")" width=100>
</div>
<div class="header-title">
<h1 id="play-bootstrap">Play-Spark</h1>
<p class="lead">Play Framework for Apache Spark</p>
</div>
</div>
<div>
<p>
This is a demo of <a href="http://www.playframework.com" target="_blank">Play Framework</a>
to execute <a href="#" target="_blank">Apache-Spark</a> applications.
</p>
<hr>
<p>
Rowcount: @message<br>
<a href="http://localhost:9000">Back</a>
</p>
</div>
</body>
</html>
}
@footer("Play-Spark", tab = "index") {
}
(image - result_count page)
(image - example of alternative output: table)
... View more