https://www.bilibili.com/video/BV1Xz4y1m7cv?p=60
需求使用SparkSQL的SQL和DSL两种方式完成wordCount
代码实现package sql
import org.apache.spark.sql.{Dataframe, Dataset, SparkSession}
object Demo05_WordCount{
def main(args: Array[String]): Unit = {
//TODO 0.准备环境
val spark = SparkSession.builder().appName("sparksql").master("local[*]").getOrCreate()
val sc = spark.sparkContext
sc.setLogLevel("WARN")
import spark.implicits._
//TODO 1.加载数据
val df: Dataframe = spark.read.text("data/person.txt")
val ds: Dataset[String] = spark.read.textFile("data/person.txt")
df.printSchema()
df.show()
ds.printSchema()
ds.show()
//TODO 2.处理数据
import spark.implicits._
// df.flatMap(_.split(" "))//注意:df没有泛型,不能直接使用split
val words: Dataset[String] = ds.flatMap(_.split(" "))
words.printSchema()
words.show()
//WordCount
//TODO =====SQL====
words.createOrReplaceGlobalTempView("t_words")
val sql:String=
"""
|select value,count(*) as counts
|from t_words
|group by value
|order by counts desc
""".stripMargin
spark.sql(sql).show
//TODO =====DSL====
words.groupBy('value)
.count()
.orderBy('count.desc)
.show()
//TODO 3.输出数据
//TODO 4.关闭资源
spark.close()
}
case class Person(id:Int,name:String,age:Int)
}
演示



