spark上的词频统计
package com.njbdqn
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setMaster("local[*]").setAppName("wc")
val sc=new SparkContext(conf) //基本配置文件
val rdd=sc.textFile("D:\ChangZhi\myspark\src\main\resources\data.txt") //文件夹位置
rdd.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).foreach(println)
sc.stop()
}
}
scala词频统计
package com.njbdqn
import org.apache.spark.{SparkConf, SparkContext}
import scala.io.Source
object test {
def main(args: Array[String]): Unit = {
val rx=Source.fromFile("D:\ChangZhi\myspark\src\main\resources\data.txt")
.getLines()
.toList.mkString(" ").split(" ")
.map((_,1))
.groupBy(_._1)
.foreach(x=>println(x._1,x._2.length))
}
}
hive 词频统计
(select explode(split(line,' ')) as word from wordcount) w //通过explode函数把wordcount表变成字段为word的w表 group by word order by word;



