package com.hj.spark;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
public class SparkStreaming {
public static void main(String[] args) throws InterruptedException {
SparkConf conf = new SparkConf().setAppName("NetwordCount").setMaster("local[2]");
// 功能入口
JavaStreamingContext jssc =new JavaStreamingContext(conf, Durations.seconds(1));
// 创建一个Dstream 接收来自TCP的数据流 主机名 端口号
JavaReceiverInputDStream lines = jssc.socketTextStream("hadoop", 9999);
JavaDStream words = lines.flatMap(new FlatMapFunction() {
@Override
public Iterator call(String s) throws Exception {
// TODO Auto-generated method stub
return Arrays.asList(s.split(" ")).iterator();
}
});
JavaPairDStream pairs = words.mapToPair(new PairFunction() {
@Override
public Tuple2 call(String s) throws Exception {
// TODO Auto-generated method stub
return new Tuple2(s, 1);
}
});
// reduceByKey
JavaPairDStream wordCount = pairs.reduceByKey(new Function2() {
@Override
public Integer call(Integer arg0, Integer arg1) throws Exception {
// TODO Auto-generated method stub
return arg0 + arg1;
}
});
wordCount.print();
jssc.start();
jssc.awaitTermination();
}
}