基础数据类型自定义Sink
kafkaRedisElasticsearchMySQL
基础数据类型
Flink 支持所有的 Java 和 Scala 基础数据类型,Int, Double, Long, String……
DataStreamnumberStream = env.fromElements(1, 2, 3, 4); numberStream.map(data -> data * 2);
Java 和 Scala 元组(Tuples)
DataStream> personStream = env.fromElements( new Tuple2("Adam", 17), new Tuple2("Sarah", 23) ); personStream.filter(p -> p.f1 > 18);
Scala 样例类(case classes)
case class Person(name: String, age: Int)
val persons: DataStream[Person] = env.fromElements(
Person("Adam", 17),
Person("Sarah", 23) )
persons.filter(p => p.age > 18)
跳转顶部
Java 简单对象(POJOs)
public class Person {
public String name;
public int age;
public Person() {}
public Person(String name, int age) {
this.name = name;
this.age = age;
}
}
DataStream persons = env.fromElements(
new Person("Alex", 42),
new Person("Wendy", 23));
其它(Arrays, Lists, Maps, Enums, 等等)
Flink 对 Java 和 Scala 中的一些特殊目的的类型也都是支持的,比如 Java 的ArrayList,HashMap,Enum 等等。
自定义Sink
Flink 没有类似于 spark 中 foreach 方法,让用户进行迭代的操作。虽有对外的输出操作都要利用 Sink 完成。最后通过类似如下方式完成整个任务最终输出操作。
stream.addSink(new MySink(xxxx))
官方提供了一部分的框架的 sink。除此以外,需要用户自定义实现 sink。
kafka编写一个程序现实当作当作Kafka的消费者,然后将数据进行处理当作Kafka的生产者给输出,也就是处理从Kafka中传入的数据并将其进行处理后输出
package sink;
import beans.SenSorReading;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
import java.util.Properties;
public class SinkTest01 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "a:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
DataStreamSource inputStream = env.addSource(new FlinkKafkaConsumer011("first", new SimpleStringSchema(), properties));
SingleOutputStreamOperator dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SenSorReading(fields[0], new Long(fields[1]), new Double(fields[2])).toString();
});
DataStreamSink first = dataStream.addSink(new FlinkKafkaProducer011("a:9092", "first", new SimpleStringSchema()));
env.execute();
}
}
跳转顶部
Redis
需要引用依赖
org.apache.bahir flink-connector-redis_2.11 1.0
自定义Map类
public static class MyRedisMapper implements RedisMapper{ // 保存到 redis 的命令,存成哈希表 public RedisCommandDescription getCommandDescription() { return new RedisCommandDescription(RedisCommand.HSET, "sensor_tempe"); } public String getKeyFromData(SensorReading data) { return data.getId(); } public String getValueFromData(SensorReading data) { return data.getTemperature().toString(); } }
在主方法里调用
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
.setHost("localhost")
.setPort(6379)
.build();
dataStream.addSink( new RedisSink(config, new MyRedisMapper()) );
跳转顶部
Elasticsearch
引用依赖
org.apache.flink flink-connector-elasticsearch6_2.12 1.10.1
自定义类
public static class MyEsSinkFunction implements ElasticsearchSinkFunction{ @Override public void process(SensorReading element, RuntimeContext ctx, RequestIndexer indexer) { HashMap dataSource = new HashMap<>(); dataSource.put("id", element.getId()); dataSource.put("ts", element.getTimestamp().toString()); dataSource.put("temp", element.getTemperature().toString()); IndexRequest indexRequest = Requests.indexRequest() .index("sensor") .type("readingData") .source(dataSource); indexer.add(indexRequest); } }
在主方法里调用
ArrayListhttpHosts = new ArrayList<>(); httpHosts.add(new HttpHost("localhost", 9200)); dataStream.addSink( new ElasticsearchSink.Builder (httpHosts, new MyEsSinkFunction()).build());
跳转顶部
MySQL
依赖
mysql mysql-connector-java 5.1.44
具体代码
package sink;
import beans.SenSorReading;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class MyJDBCSink {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource inputStream = env.readTextFile("src/main/resources/sensor.txt");
SingleOutputStreamOperator dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SenSorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
});
dataStream.addSink(new MyJdbcSink());
env.execute();
}
public static class MyJdbcSink extends RichSinkFunction {
Connection conn = null;
PreparedStatement insertStmt = null;
PreparedStatement updateStmt = null;
// open 主要是创建连接
@Override
public void open(Configuration parameters) throws Exception {
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test",
"root", "123456");
// 创建预编译器,有占位符,可传入参数
insertStmt = conn.prepareStatement("INSERT INTO sensor_temp (id, temp) VALUES (?, ?)");
updateStmt = conn.prepareStatement("UPDATE sensor_temp SET temp = ? WHERe id = ?");
}
// 调用连接,执行 sql
@Override
public void invoke(SenSorReading value, Context context) throws Exception {
// 执行更新语句,注意不要留 super
updateStmt.setDouble(1, value.getTemperature());
updateStmt.setString(2, value.getId());
updateStmt.execute();
// 如果刚才 update 语句没有更新,那么插入
if (updateStmt.getUpdateCount() == 0) {
insertStmt.setString(1, value.getId());
insertStmt.setDouble(2, value.getTemperature());
insertStmt.execute();
}
}
@Override
public void close() throws Exception {
insertStmt.close();
updateStmt.close();
conn.close();
}
}
}
跳转顶部



