目录
Apache Hadoop生态-目录汇总-持续更新
系统环境:centos7
Java环境:Java8
1: 文件->flume->kafka
tailDir source -> kafka channel
配置文件:file_flume_kafka.conf
# 1:定义组件
file_flume_kafka.sources = r1
file_flume_kafka.channels = c1
# 2:定义source
file_flume_kafka.sources.r1.type = TAILDIR
file_flume_kafka.sources.r1.positionFile = /usr/local/flume-1.9.0/project_v4/tail_dir.json
file_flume_kafka.sources.r1.fileSuffix = .COMPLETED
file_flume_kafka.sources.r1.filegroups = f1
file_flume_kafka.sources.r1.filegroups.f1 = /log/app.*.log
## 定义source拦截器(ETL数据清洗,判断数据是否完整)
file_flume_kafka.sources.r1.interceptors = i1
file_flume_kafka.sources.r1.interceptors.i1.type = com.wester.flume.interceptor.ETLInterceptor$Builder
# 3:定义channel
file_flume_kafka.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
file_flume_kafka.channels.c1.kafka.bootstrap.servers = 192.168.5.103:9092,192.168.5.87:9092,192.168.5.114:9092
file_flume_kafka.channels.c1.kafka.topic = project_v4_topic_log
## 设置消费者组,保证每次消费时能够获取上次对应的Offset
file_flume_kafka.channels.c1.consumer.group.id = file_flume_kafka
#设置不用flume组装的header
file_flume_kafka.channels.c1.parseAsFlumeEvent = false
# 4:定义sink - 这里不用
# 5:定义关联关系
file_flume_kafka.sources.r1.channels = c1
启动
nohup /usr/local/flume-1.9.0/bin/flume-ng agent \
--conf-file /shucang_v4/project_files/flume_jobs/conf/file_flume_kafka.conf \
-C /shucang_v4/project_files/flume_jobs/jar/project_v4_flume.jar \
--name file_flume_kafka -Dflume.root.logger=INFO,LOGFILE >/usr/local/flume-1.9.0/logs/file_flume_kafka.log 2>&1 &