-
Recent Posts
Archives
Categories
Meta
Category Archives: spark
Spark pipeline get best model
val lr = new LinearRegression() val pipeline = new Pipeline().setStages(Array(lr)) val paramGrid = new ParamGridBuilder().addGrid(lr.regParam, Array(0, 0.5, 1.0)).build() val cv = new CrossValidator().setEstimator(pipeline).setEvaluator(new RegressionEvaluator).setEstimatorParamMaps(paramGrid).setNumFolds(2) val cvModel = cv.fit(data) val model = cvModel.bestModel.asInstanceOf[PipelineModel] val lrModel = model.stages(0).asInstanceOf[LinearRegressionModel]
Posted in spark
Leave a comment
Spark dataframe stats mean
df.describe().rdd.map{ case r : Row => (r.getAs[String](“summary”),r) }.filter(_._1 == “mean”).map(_._2).first().toSeq.drop(1).map(x => x.toString().toDouble)
Posted in spark
Leave a comment