IDEA软件scala版本2.12.11
pml.xml
org.scala-lang scala-library2.12.4 org.scala-lang scala-compiler2.12.4 org.scala-lang scala-reflect2.12.4 log4j log4j1.2.12 org.apache.spark spark-core_2.123.0.0 org.scala-tools maven-scala-plugin2.15.2 compile testCompile
代码:
import org.apache.spark.{SparkConf, SparkContext}
object Test01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(“one”).setMaster(“local[*]”)
val context = new SparkContext(conf)
val iterator = context.textFile(“data/a.txt”)
val list = iterator.map(x => {
val str = x.split(",")
(str(0), str(1), str(2), str(3))
})
println("求每个学生的总成绩")
list.map(x=>{
(x._1,(x._2.toInt+x._3.toInt+x._4.toInt))
}).foreach(println)
println("求每个学生的平均成绩")
list.map(x=>{
(x._1,(x._2.toInt+x._3.toInt+x._4.toInt)/3)
}).foreach(println)
println("求数学第一名的学生的各门成绩")
list.map(x=>{
(x._1,x._2.toInt,x._3,x._4)
}).sortBy(_._2,false)
.collect().take(1).foreach(println)
println("求总分第一名的学生的各们成绩")
list.map(x=>{
(x._1,x._2,x._3,x._4,(x._2.toInt+x._3.toInt+x._4.toInt))
}).sortBy(_._5,false)
.collect().take(1).foreach(println)
println("使用累加器求每个学生的总成绩,不使用累加器不得分")
val leijia = context.collectionAccumulator("leijia")
list.groupBy(_._1).map(x=>{
val tuples = x._2.map(x => {
(x._2.toInt, x._3.toInt, x._4.toInt)
}).map(x=>{
x._1+x._2+x._3
})
(x._1,tuples)
}).foreach(println)
}
}



