spark方式wordcount两个版本
- pom.xml
- scala
- java
- mapreduce(未完待续)
pom.xml
4.0.0
org.example
bigDataLogTest_new
1.0-SNAPSHOT
bigDataLogTest_new
http://www.example.com
UTF-8
1.8
1.8
junit
junit
4.11
test
org.apache.spark
spark-core_2.11
1.4.0
log4j
log4j
1.2.17
scala
import org.apache.spark.{SparkConf, SparkContext}
object current_01{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("My App")
val sc = new SparkContext(conf)
val input = sc.textFile("D:\Java_code\bigDataLogTest_new\pom.xml")
val words = input.flatMap(line => line.split(" "))
val counts = words.map(word => (word, 1)).reduceByKey { case (x, y) => x + y }
counts.saveAsTextFile("D:\Java_code\bigDataLogTest_new\data\text.txt")
// println(sc)
}
}
java
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.util.*;
public class wordCount {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("wordCount");
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaRDD inputRDD = jsc.textFile("D:\Java_code\bigDataLogTest_new\pom.xml");
inputRDD.flatMap(new FlatMapFunction() {
@Override
public Iterable call(String s) throws Exception {
return new ArrayList(Arrays.asList(s.split(" ")));
}
}).distinct()
.map(new Function>() {
@Override
public Tuple2 call(String v1) throws Exception {
return new Tuple2<>(v1,1);
}
}).foreach(new VoidFunction>() {
@Override
public void call(Tuple2 stringIntegerTuple2) throws Exception {
System.out.printf(String.valueOf(stringIntegerTuple2) + "n");
}
});
}
}
mapreduce(未完待续)