需要数据集可以给我留言
object linner {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("linner")
val spark = SparkSession.builder().config(conf).getOrCreate()
val file = spark.read.format("csv").option("sep",";").option("header","true").load("house.csv")
import spark.implicits._
//生成随机数
val random = new util.Random()
val data = file.select("square","price").map(rows => (rows.getAs[String](0).toDouble,rows.getString(1)
.toDouble,random.nextDouble())).toDF("square","price","random")
.sort("random")
//类似封装成 数组
val assembler = new VectorAssembler()
.setInputCols(Array("square"))
.setOutputCol("features")
val frame = assembler.transform(data)
//把数据集拆分2个部分
val Array(train,test) = frame.randomSplit(Array(0.8,0.2),1L)
//创建线性回归的示例
val regression = new LinearRegression()
.setMaxIter(10)//训练轮次
.setRegParam(0.3)//正则化
.setElasticNetParam(0.8)//推荐值
//Features 特征向量 label 标签
val model = regression.setLabelCol("price").setFeaturesCol("features").fit(train)
model.transform(test).show()
spark.stop()
}



