RDD小案例
object RDD01 {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD01")
val sc: SparkContext = new SparkContext(sparkConf)
//从集合中创建RDD,spark提供了两个方法,parallelize和makeRDD,makeRDD就是实现parallelize
sc.parallelize(
List(1,2,3,4)
).filter(_>3)
.collect()
.foreach(println)
sc.stop()
}
}
broadcast小案例
object BroadCast01 {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("broadcast01")
val sc: SparkContext = new SparkContext(sparkConf)
val rdd1= sc.makeRDD(List(("a", 1), ("b", 2), ("c", 3), ("d", 4)))
val list= List(("a", 5), ("b", 6), ("c",7))
//声明广播变量
val broadcast: Broadcast[List[(String,Int)]] = sc.broadcast(list)
rdd1.map {
case (key, num) => {
var num2 = 0
//使用广播变量
for ((k, v) <- broadcast.value) {
if (key == k) {
num2 = num * v
}
}
(key, num2)
}
}.filter(x=>x._2>0) //筛选value>0的
.foreach(x=>{println(x._1+":"+x._2)})
sc.stop()
}
}



