Created 12-29-2016 05:49 AM
Even though i have already imported all the necessary libraries for using RandomForestClassifier with weightCol parameter, I still get the following error: value weightCol is not a member of org.apache.spark.ml.classification.RandomForestClassifier. I'm currently using Spark 1.6.1.
Here is my code:
import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier} import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer} import org.apache.spark.ml.classification.RandomForestClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.feature.StringIndexer import org.apache.spark.ml.feature.VectorAssembler val sqlContext = new org.apache.spark.sql.SQLContext(sc) import sqlContext.implicits._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date} val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) import sqlContext.implicits._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.functions.{unix_timestamp, from_unixtime, to_date} import org.apache.spark.mllib.evaluation.MulticlassMetrics import org.apache.spark.mllib.util.MLUtils import org.apache.spark.sql.Row import org.apache.spark.ml.param.shared.HasWeightCol val raw = sqlContext.sql("SELECT * FROM fraudegt.sample_cdr_train_v2") val mod = raw.withColumn("id", raw("id").cast("string")) val mod1 = mod.na.fill(0) val assembler = new VectorAssembler().setInputCols(Array("hora_del_dia","dia_mes","duracion","duracion_dia","duracion_24h","avg_duracion_dia","avg_duracion_24h","avg_duracion_historica","celdas_iniciales_distintas_dia","celdas_iniciales_distintas_historico","celdas_finales_distintas_dia","celdas_finales_distintas_historico","pmc_dia","pmc_historico","imcd_dia","imcd_historico","llamadas_en_dia")).setOutputCol("features") val df_all = assembler.transform(mod1) val labelIndexer = new StringIndexer().setInputCol("fraude").setOutputCol("label") val df = labelIndexer.fit(df_all).transform(df_all) val splits = df.randomSplit(Array(0.7, 0.3)) val (trainingData, testData) = (splits(0), splits(1)) val classifier = new RandomForestClassifier().setImpurity("gini").setMaxDepth(4).setNumTrees(100).setFeatureSubsetStrategy("auto").setSeed(5043) val model = classifier.fit(trainingData) val model2 = classifier.fit(trainingData, classifier.weightCol->"weight")
Created 12-29-2016 03:00 PM
weightCol is not part of RandomForestClassifier,its part of logisticrregression
@inherit_doc [docs]class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, HasRawPredictionCol, HasProbabilityCol,
RandomForestParams, TreeClassifierParams, HasCheckpointInterval):
@inherit_doc [docs]class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol): """ http://spark.apache.org/docs/1.6.1/api/python/_modules/pyspark/ml/classification.html
Created 12-29-2016 03:00 PM
weightCol is not part of RandomForestClassifier,its part of logisticrregression
@inherit_doc [docs]class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, HasRawPredictionCol, HasProbabilityCol,
RandomForestParams, TreeClassifierParams, HasCheckpointInterval):
@inherit_doc [docs]class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol): """ http://spark.apache.org/docs/1.6.1/api/python/_modules/pyspark/ml/classification.html