<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question In spark shell, while using weightCol in RandomForestClassifier throws error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/In-spark-shell-while-using-weightCol-in/m-p/174117#M50376</link>
    <description>&lt;P&gt;Even though i have already imported all the necessary libraries for using RandomForestClassifier with weightCol parameter, I still get the following error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier. I'm currently using Spark 1.6.1. &lt;/P&gt;&lt;P&gt;Here is my code: &lt;/P&gt;&lt;PRE&gt;import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.ml.feature.VectorAssembler
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date}
val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
import sqlContext.implicits._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date}
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.Row
import org.apache.spark.ml.param.shared.HasWeightCol
val raw = sqlContext.sql("SELECT * FROM fraudegt.sample_cdr_train_v2")
val mod = raw.withColumn("id", raw("id").cast("string"))
val mod1 = mod.na.fill(0)
val assembler = new VectorAssembler().setInputCols(Array("hora_del_dia","dia_mes","duracion","duracion_dia","duracion_24h","avg_duracion_dia","avg_duracion_24h","avg_duracion_historica","celdas_iniciales_distintas_dia","celdas_iniciales_distintas_historico","celdas_finales_distintas_dia","celdas_finales_distintas_historico","pmc_dia","pmc_historico","imcd_dia","imcd_historico","llamadas_en_dia")).setOutputCol("features")
val df_all = assembler.transform(mod1)
val labelIndexer = new StringIndexer().setInputCol("fraude").setOutputCol("label")
val df = labelIndexer.fit(df_all).transform(df_all)
val splits = df.randomSplit(Array(0.7, 0.3))
val (trainingData, testData) = (splits(0), splits(1))
val classifier = new RandomForestClassifier().setImpurity("gini").setMaxDepth(4).setNumTrees(100).setFeatureSubsetStrategy("auto").setSeed(5043)
val model = classifier.fit(trainingData)
val model2 = classifier.fit(trainingData, classifier.weightCol-&amp;gt;"weight")&lt;/PRE&gt;</description>
    <pubDate>Thu, 29 Dec 2016 13:49:12 GMT</pubDate>
    <dc:creator>jmbohl</dc:creator>
    <dc:date>2016-12-29T13:49:12Z</dc:date>
    <item>
      <title>In spark shell, while using weightCol in RandomForestClassifier throws error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/In-spark-shell-while-using-weightCol-in/m-p/174117#M50376</link>
      <description>&lt;P&gt;Even though i have already imported all the necessary libraries for using RandomForestClassifier with weightCol parameter, I still get the following error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier. I'm currently using Spark 1.6.1. &lt;/P&gt;&lt;P&gt;Here is my code: &lt;/P&gt;&lt;PRE&gt;import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.ml.feature.VectorAssembler
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date}
val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
import sqlContext.implicits._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date}
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.Row
import org.apache.spark.ml.param.shared.HasWeightCol
val raw = sqlContext.sql("SELECT * FROM fraudegt.sample_cdr_train_v2")
val mod = raw.withColumn("id", raw("id").cast("string"))
val mod1 = mod.na.fill(0)
val assembler = new VectorAssembler().setInputCols(Array("hora_del_dia","dia_mes","duracion","duracion_dia","duracion_24h","avg_duracion_dia","avg_duracion_24h","avg_duracion_historica","celdas_iniciales_distintas_dia","celdas_iniciales_distintas_historico","celdas_finales_distintas_dia","celdas_finales_distintas_historico","pmc_dia","pmc_historico","imcd_dia","imcd_historico","llamadas_en_dia")).setOutputCol("features")
val df_all = assembler.transform(mod1)
val labelIndexer = new StringIndexer().setInputCol("fraude").setOutputCol("label")
val df = labelIndexer.fit(df_all).transform(df_all)
val splits = df.randomSplit(Array(0.7, 0.3))
val (trainingData, testData) = (splits(0), splits(1))
val classifier = new RandomForestClassifier().setImpurity("gini").setMaxDepth(4).setNumTrees(100).setFeatureSubsetStrategy("auto").setSeed(5043)
val model = classifier.fit(trainingData)
val model2 = classifier.fit(trainingData, classifier.weightCol-&amp;gt;"weight")&lt;/PRE&gt;</description>
      <pubDate>Thu, 29 Dec 2016 13:49:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/In-spark-shell-while-using-weightCol-in/m-p/174117#M50376</guid>
      <dc:creator>jmbohl</dc:creator>
      <dc:date>2016-12-29T13:49:12Z</dc:date>
    </item>
    <item>
      <title>Re: In spark shell, while using weightCol in RandomForestClassifier throws error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/In-spark-shell-while-using-weightCol-in/m-p/174118#M50377</link>
      <description>&lt;P&gt;weightCol is not part of RandomForestClassifier,its part of logisticrregression&lt;/P&gt;&lt;PRE&gt;@inherit_doc
&lt;A href="http://spark.apache.org/docs/1.6.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier"&gt;[docs]&lt;/A&gt;class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
                             HasRawPredictionCol, HasProbabilityCol, &lt;/PRE&gt;&lt;P&gt;                             RandomForestParams, TreeClassifierParams, HasCheckpointInterval):&lt;/P&gt;&lt;PRE&gt;@inherit_doc
&lt;A href="http://spark.apache.org/docs/1.6.1/api/python/pyspark.ml.html#pyspark.ml.classification.LogisticRegression"&gt;[docs]&lt;/A&gt;class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
                         HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol,
                         HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
                         HasWeightCol):
    """
&lt;A href="http://spark.apache.org/docs/1.6.1/api/python/_modules/pyspark/ml/classification.html" target="_blank"&gt;http://spark.apache.org/docs/1.6.1/api/python/_modules/pyspark/ml/classification.html&lt;/A&gt;

&lt;/PRE&gt;</description>
      <pubDate>Thu, 29 Dec 2016 23:00:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/In-spark-shell-while-using-weightCol-in/m-p/174118#M50377</guid>
      <dc:creator>rkandula</dc:creator>
      <dc:date>2016-12-29T23:00:46Z</dc:date>
    </item>
  </channel>
</rss>

