<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Spark java.lang.StackOverflowError in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Spark-java-lang-StackOverflowError/m-p/226955#M188815</link>
    <description>&lt;PRE&gt;&amp;lt;br&amp;gt;&lt;/PRE&gt;&lt;P&gt;I try to build a model for movie lens rating data with Spark ALS. On Windows host, I use Spark 2.3.1. Data has just and 100.000 rows and three columns; userid, movieid, and rating. My machine has Intel i7 and 32 GB memory. I have increased executor memory to 10 G. I get java.lang.StackOverflowErrorMy error. My codes are below:&lt;/P&gt;&lt;PRE&gt;object ErkansALS {
def main(args: Array[String]): Unit = {


val sparkConf = new SparkConf()
     .setMaster("local[4]")
     .setAppName("SparkALS")
     .setExecutorEnv("spark.driver.memory","8g")
     .setExecutorEnv("spark.executor.memory","10g")
     .setExecutorEnv("spark.sql.broadcastTimeout","1200")


val spark = SparkSession.builder()
    .config(sparkConf)
    .getOrCreate()
val movieRatings = spark.read.format("csv")
      .option("header","true")
      .option("inferSchema","true")
      .load("ratings.csv")
      .drop("timestamp")


val Array(training, test) = movieRatings.randomSplit(Array(0.8, 0.2),seed = 142)
training.cache()
val alsObject = new ALS()
      .setUserCol("userId")
      .setItemCol("movieId")
      .setRatingCol("rating")
      .setColdStartStrategy("drop")
      .setNonnegative(true)






val paramGridObject = new ParamGridBuilder()
      .addGrid(alsObject.rank, Array(12,14))
      .addGrid(alsObject.maxIter, Array(18,20))
      .addGrid(alsObject.regParam, Array(.17,.19))
      .build()




val evaluator = new RegressionEvaluator()
      .setMetricName("rmse")
      .setLabelCol("rating")
      .setPredictionCol("prediction")




val tvs = new TrainValidationSplit()
      .setEstimator(alsObject)
      .setEstimatorParamMaps(paramGridObject)
      .setEvaluator(evaluator)


val model = tvs.fit(training)




val bestModel = model.bestModel


val predictions = bestModel.transform(test)
val rmse = evaluator.evaluate(predictions)


predictions.show()
println("RMSE = ", rmse)
println("Best Model")


  }
}
&lt;/PRE&gt;&lt;P&gt;Errors are attached.&lt;/P&gt;&lt;P&gt;But when I try without TrainValidationSplit it works:&lt;/P&gt;&lt;PRE&gt;package spark.ml.recommendation.als&lt;BR /&gt;&lt;BR /&gt;import org.apache.spark.ml.evaluation.RegressionEvaluator&lt;BR /&gt;import org.apache.spark.ml.recommendation.ALS&lt;BR /&gt;import org.apache.spark.ml.tuning.{TrainValidationSplit, ParamGridBuilder}&lt;BR /&gt;import org.apache.spark.sql.{SparkSession}&lt;BR /&gt;import org.apache.spark.{SparkConf, SparkContext}&lt;BR /&gt;&lt;BR /&gt;object ErkansALS {&lt;BR /&gt;  def main(args: Array[String]): Unit = {&lt;BR /&gt;/*   val sparkConf = new SparkConf()     .setExecutorEnv("spark.driver.memory","4g")     .setExecutorEnv("spark.executor.memory","8g")     .setExecutorEnv("spark.sql.broadcastTimeout","1200")     .setExecutorEnv("spark.eventLog.enabled","false")*/val spark = SparkSession.builder()&lt;BR /&gt;    .master("local[*]")&lt;BR /&gt;    .appName("SparkALS")&lt;BR /&gt;    .getOrCreate()&lt;BR /&gt;&lt;BR /&gt;    val movieRatings = spark.read.format("csv")&lt;BR /&gt;      .option("header","true")&lt;BR /&gt;      .option("inferSchema","true")&lt;BR /&gt;      .load("C:\\Users\\toshiba\\SkyDrive\\veribilimi.co\\Datasets\\ml-latest-small\\ratings.csv")&lt;BR /&gt;      .drop("timestamp")&lt;BR /&gt;     // .sample(0.1,142)movieRatings.show()&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt; println(movieRatings.count())&lt;BR /&gt;&lt;BR /&gt;// 100.004 adet rating var.    // Create training and test setval Array(training, test) = movieRatings.randomSplit(Array(0.8, 0.2),seed = 142)&lt;BR /&gt;training.cache()&lt;BR /&gt;&lt;BR /&gt;    // Create ALS modelval alsObject = new ALS()&lt;BR /&gt;      .setUserCol("userId")&lt;BR /&gt;      .setItemCol("movieId")&lt;BR /&gt;      .setRatingCol("rating")&lt;BR /&gt;      .setColdStartStrategy("drop")&lt;BR /&gt;      .setNonnegative(true)&lt;BR /&gt;&lt;BR /&gt;/*    // Tune model using ParamGridBuilder    val paramGridObject = new ParamGridBuilder()      .addGrid(alsObject.rank, Array(14))      .addGrid(alsObject.maxIter, Array(20))      .addGrid(alsObject.regParam, Array(.19))      .build()*/    // Define evaluator as RMSEval evaluator = new RegressionEvaluator()&lt;BR /&gt;      .setMetricName("rmse")&lt;BR /&gt;      .setLabelCol("rating")&lt;BR /&gt;      .setPredictionCol("prediction")&lt;BR /&gt;/*    // Build cross validation using TrainValidationSplit    val tvs = new TrainValidationSplit()      .setEstimator(alsObject)      .setEstimatorParamMaps(paramGridObject)      .setEvaluator(evaluator)*/    // Fit ALS model to training setval model = alsObject.fit(training)&lt;BR /&gt;/*        // Take best model        val bestModel = model.bestModel*/        // Generate predictions and evaluate RMSEval predictions = model.transform(test)&lt;BR /&gt;        val rmse = evaluator.evaluate(predictions)&lt;BR /&gt;&lt;BR /&gt;        predictions.show()&lt;BR /&gt;&lt;BR /&gt;        // Print evaluation metrics and model parametersprintln("RMSE = ", rmse)&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;  }&lt;BR /&gt;}&lt;/PRE&gt;</description>
    <pubDate>Sun, 26 Aug 2018 19:31:45 GMT</pubDate>
    <dc:creator>erkansirin78</dc:creator>
    <dc:date>2018-08-26T19:31:45Z</dc:date>
  </channel>
</rss>

