简介:本篇博客主要数用于Spark安装用来测试用的。
1、创建一个maven工程
详细结构如上图
打包给集群运行配置4.0.0 com.lqs WordCount 1.0-SNAPSHOT 8 8 org.apache.spark spark-core_2.12 3.0.0 WordCount net.alchim31.maven scala-maven-plugin 3.4.6 compile testCompile
3、本地运行Spark代码4.0.0 com.lqs WordCount 1.0-SNAPSHOT 8 8 org.apache.spark spark-core_2.12 3.0.0 WordCount net.alchim31.maven scala-maven-plugin 3.4.6 compile testCompile org.apache.maven.plugins maven-assembly-plugin 3.0.0 jar-with-dependencies make-assembly package single org.apache.maven.plugins maven-assembly-plugin 3.0.0 jar-with-dependencies make-assembly package single
package com.lqs.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
//创建SparkConf并设置APP名称
val conf: SparkConf = new SparkConf().setAppName("WordCount").setMaster("local[*]")
//2创建SparkContext,该对象是提交Spark APP的人口
val context: SparkContext = new SparkContext(conf)
//需要使用行动算子,读取指定位置文件
val lineRdd: RDD[String] = context.textFile("input")
//读取的一行一行的数据分解成一个一个的单词(扁平化)
val wordRdd: RDD[String] = lineRdd.flatMap(line => line.split(" "))
//将数据转换结构
val wordToOneRdd: RDD[(String, Int)] = wordRdd.map(word => (word, 1))
//将转换结构后的数据进行聚合处理
val wordToSumRdd: RDD[(String, Int)] = wordToOneRdd.reduceByKey((v1, v2) => v1 + v2)
//将统计结构采集到控制台打印
val wordToCountArray: Array[(String, Int)] = wordToSumRdd.collect()
wordToCountArray.foreach(println)
//一行写完,打包时也用本行代码,把其它一样的注释掉即可
// context.textFile(args(0)).flatMap(_.split(" ")).map((_,1)).reduceByKey(_ + _).saveAsTextFile(args(1))
//暂停一段时间,好在http://lqs:4040访问
// Thread.sleep(1000000)
//关闭连接
context.stop()
}
}
4、集群运行打包代码
package com.lqs.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
//创建SparkConf并设置APP名称
val conf: SparkConf = new SparkConf().setAppName("WordCount").setMaster("local[*]")
//2创建SparkContext,该对象是提交Spark APP的人口
val context: SparkContext = new SparkContext(conf)
//一行写完,打包时也用本行代码,把其它一样的注释掉即可
context.textFile(args(0)).flatMap(_.split(" ")).map((_,1)).reduceByKey(_ + _).saveAsTextFile(args(1))
context.stop()
}
}



