需求:
1.将kafka中的数据采集传输到hdfs当中 2.采用的是lzo压缩的方式(也可以不采用压缩方式,flume官网有) 3.这里使用的是两个主题kafka主题采集
a1.sources=r1 r2 a1.channels=c1 c2 a1.sinks=k1 k2 # kafka source a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r1.channels = c1 a1.sources.r1.batchSize = 5000 a1.sources.r1.batchDurationMillis = 2000 a1.sources.r1.kafka.bootstrap.servers = linux101:9092,linux102:9092,linux103:9092 a1.sources.r1.kafka.topics = topic_start a1.sources.r2.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r2.channels = c2 a1.sources.r2.batchSize = 5000 a1.sources.r2.batchDurationMillis = 2000 a1.sources.r2.kafka.bootstrap.servers = linux101:9092,linux102:9092,linux103:9092 a1.sources.r2.kafka.topics = topic_event # channel a1.channels.c1.type = file a1.channels.c1.checkpointDir = /opt/module/flume/checkpoint/behavior1 a1.channels.c1.dataDirs = /opt/module/flume/data/behavior1/ a1.channels.c1.keep-alive = 6 a1.sources.r2.batchSize = 5000 a1.sources.r2.batchDurationMillis = 2000 a1.sources.r2.kafka.bootstrap.servers = linux101:9092,linux102:9092,linux103:9092 a1.sources.r2.kafka.topics = topic_event # channel a1.channels.c1.type = file a1.channels.c1.checkpointDir = /opt/module/flume/checkpoint/behavior1 a1.channels.c1.dataDirs = /opt/module/flume/data/behavior1/ a1.channels.c1.keep-alive = 6 ## channel2 a1.channels.c2.type = file a1.channels.c2.checkpointDir = /opt/module/flume/checkpoint/behavior2 a1.channels.c2.dataDirs = /opt/module/flume/data/behavior2/ a1.channels.c2.keep-alive = 6 # sink1 a1.sinks.k1.type = hdfs a1.sinks.k1.channel = c1 a1.sinks.k1.hdfs.path = /origin_data/gmall/log/topic_event/%Y-%m-%d a1.sinks.k1.hdfs.filePrefix = logstart- # sink2 a1.sinks.k2.type = hdfs a1.sinks.k2.channel = c2 a1.sinks.k2.hdfs.path = /origin_data/gmall/log/topic_event/%Y-%m-%d a1.sinks.k2.hdfs.filePrefix = logevent- # 小文件问题: ## 不要产生大量小文件,生产环境中rollInterval配置为3600 a1.sinks.k1.hdfs.rollInterval = 10 a1.sinks.k1.hdfs.rollSize = 134217728 a1.sinks.k1.hdfs.rollCount = 0 a1.sinks.k2.hdfs.rollInterval = 10 a1.sinks.k2.hdfs.rollSize = 134217728 a1.sinks.k2.hdfs.rollCount = 0 # 设置lzo压缩 a1.sinks.k1.hdfs.fileType = CompressedStream a1.sinks.k2.hdfs.fileType = CompressedStream a1.sinks.k1.hdfs.codeC = lzop a1.sinks.k2.hdfs.codeC = lzop



