-
Notifications
You must be signed in to change notification settings - Fork 0
/
flume-hdfs_sink-02.conf
74 lines (65 loc) · 3.62 KB
/
flume-hdfs_sink-02.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# 各组件的命名
agent.sources = avroSource
agent.channels = memoryChannel
agent.sinks = hdfsSink
# 设置source与sink的channel
agent.sources.avroSource.channels = memoryChannel
#------------------------- taildirSource相关配置 -------------------------#
# agent.sources.avroSource.type = avro
# agent.sources.avroSource.bind = 10.83.96.13
# agent.sources.avroSource.port = 8001
# agent.sources.avroSource.channels = memoryChannel
agent.sources.avroSource.type = avro
agent.sources.avroSource.bind = 10.83.96.13
agent.sources.avroSource.port = 8002
agent.sources.avroSource.channels = memoryChannel
#------------------------- memoryChannel相关配置 -------------------------#
agent.channels.memoryChannel.type = memory
# channel中存储的最大事件数
agent.channels.memoryChannel.capacity = 10
# 添加或删除事件的超时时间(3秒)
agent.channels.memoryChannel.keep-alive = 1
# channel从source拿的最大数或发送给sink的最大事件数
agent.channels.memoryChannel.transactionCapacity = 10
# 定义byteCapacity与通道中所有事件的估计总大小之间的缓冲区百分比,以计算标头中的数据
agent.channels.memoryChannel.byteCapacityBufferPercentage = 20
# 允许的最大内存总字节数,作为此通道中所有事件的总和。
agent.channels.memoryChannel.byteCapacity = 800000
agent.sinks.hdfsSink.channel = memoryChannel
#------------------------- hdfsSink相关配置 -------------------------#
agent.sinks.hdfsSink.type = hdfs
# 在将文件刷新到HDFS之前写入文件的事件数(默认100)
agent.sinks.hdfsSink.hdfs.batchSize = 10
# hdfs路径
agent.sinks.hdfsSink.hdfs.path = hdfs://testsnow:8020/test/%{log_type}
# 写入的文件的格式(默认Writable.), 写入HDFS时需要修改为Text, 否则Hive无法读取这些文件
agent.sinks.hdfsSink.hdfs.writeFormat = Text
# 文件格式 : SequenceFile(默认)、DataStream、CompressedStream
# DataStream输出的是不可被压缩文件; CompressedStream输出的是可压缩文件, 且需要设置压缩格式
agent.sinks.hdfsSink.hdfs.fileType = DataStream
# 设置压缩格式, 可用压缩格式 : gzip, bzip2, lzo, lzop, snappy
# a.sinks.hdfsSink.hdfs.codeC =
# 配置前缀和后缀
agent.sinks.hdfsSink.hdfs.filePrefix = %{log_type}
agent.sinks.hdfsSink.hdfs.fileSuffix = .log
# 开启时间戳,在头文件中添加时间戳
agent.sinks.hdfsSink.hdfs.useLocalTimeStamp = true
# 一天滚动一个文件夹
# 开启时间戳向下舍入(除%t之外的所有基于时间的转义序列)
agent.sinks.hdfsSink.hdfs.round = true
# 时间戳舍入的范围一天
agent.sinks.hdfsSink.hdfs.roundValue = 24
# 只有second、minute、hour三种格式
agent.sinks.hdfsSink.hdfs.roundUnit = hour
# 每隔一小时(默认30s)将临时文件滚动成一个目标文件
agent.sinks.hdfsSink.hdfs.rollInterval = 3600
# 当文件大小为128M时,将临时文件滚动成一个目标文件(默认1024B)
agent.sinks.hdfsSink.hdfs.rollSize = 134217728
# 事件数据达到该数量的时候,将临时文件滚动成目标文件(从不基于事件滚动,默认10个事件)
agent.sinks.hdfsSink.hdfs.rollCount = 0
# 设置hdfs的超时访问时间(默认10000毫秒)
# agent.sinks.hdfsSink.hdfs.callTimeout = 3600000
# /home/hadoop/flume-1.9.0/bin/flume-ng agent --conf /home/hadoop/flume-1.9.0/conf/ -f /home/hadoop/flume-1.9.0/conf/flume-hdfs_sink-02.conf -n agent -Dflume.root.logger=info,console
# nohup /home/hadoop/flume-1.9.0/bin/flume-ng agent --conf /home/hadoop/flume-1.9.0/conf/ -f /home/hadoop/flume-1.9.0/conf/flume-hdfs_sink-02.conf -n agent &>> /dev/null &
# jobs -l | awk '{print $2}' | xargs kill
# ps -ef | grep -v grep | grep flume | awk '{print $2}' | xargs kill