flume hdfs一些简单配置记忆
时间:2015-05-11 02:56 来源:linux.it.net.cn 作者:IT
1 ############################################
2 # producer config
3 ############################################
4 #agent section
5 producer.sources = s
6 producer.channels = c c1 c2
7 producer.sinks = r h es
8
9 #source section
10 producer.sources.s.type =exec
11 producer.sources.s.command = tail -f /usr/local/nginx/logs/test1.log
12 #producer.sources.s.type = spooldir
13 #producer.sources.s.spoolDir = /usr/local/nginx/logs/
14 #producer.sources.s.fileHeader = true
15
16 producer.sources.s.channels = c c1 c2
17
18 producer.sources.s.interceptors = i
19 #不支持忽略大小写
20 producer.sources.s.interceptors.i.regex = .*\.(css|js|jpg|jpeg|png|gif|ico).*
21 producer.sources.s.interceptors.i.type = org.apache.flume.interceptor.RegexFilteringInterceptor$Builder
22 #不包含
23 producer.sources.s.interceptors.i.excludeEvents = true
24
25 ############################################
26 # hdfs config
27 ############################################
28 producer.channels.c.type = memory
29 #Timeout in seconds for adding or removing an event
30 producer.channels.c.keep-alive= 30
31 producer.channels.c.capacity = 10000
32 producer.channels.c.transactionCapacity = 10000
33 producer.channels.c.byteCapacityBufferPercentage = 20
34 producer.channels.c.byteCapacity = 800000
35
36 producer.sinks.r.channel = c
37
38 producer.sinks.r.type = avro
39 producer.sinks.r.hostname = 127.0.0.1
42 producer.sinks.r.port = 10101
43 ############################################
44 # hdfs config
45 ############################################
46 producer.channels.c1.type = memory
47 #Timeout in seconds for adding or removing an event
48 producer.channels.c1.keep-alive= 30
49 producer.channels.c1.capacity = 10000
50 producer.channels.c1.transactionCapacity = 10000
51 producer.channels.c1.byteCapacityBufferPercentage = 20
52 producer.channels.c1.byteCapacity = 800000
53
54 producer.sinks.h.channel = c1
55
56 producer.sinks.h.type = hdfs
57 #目录位置
58 producer.sinks.h.hdfs.path = hdfs://127.0.0.1/tmp/flume/%Y/%m/%d
59 #文件前缀
60 producer.sinks.h.hdfs.filePrefix=nginx-%Y-%m-%d-%H
61 producer.sinks.h.hdfs.fileType = DataStream
62 #时间类型必加,不然会报错
63 producer.sinks.h.hdfs.useLocalTimeStamp = true
64 producer.sinks.h.hdfs.writeFormat = Text
65 #hdfs创建多长时间新建文件,0不基于时间
66 #Number of seconds to wait before rolling current file (0 = never roll based on time interval)
67 producer.sinks.h.hdfs.rollInterval=0
68 hdfs多大时新建文件,0不基于文件大小
69 #File size to trigger roll, in bytes (0: never roll based on file size)
70 producer.sinks.h.hdfs.rollSize = 0
71 #hdfs有多少条消息时新建文件,0不基于消息个数
72 #Number of events written to file before it rolled (0 = never roll based on number of events)
73 producer.sinks.h.hdfs.rollCount = 0
74 #批量写入hdfs的个数
75 #number of events written to file before it is flushed to HDFS
76 producer.sinks.h.hdfs.batchSize=1000
77 #flume操作hdfs的线程数(包括新建,写入等)
78 #Number of threads per HDFS sink for HDFS IO ops (open, write, etc.)
79 producer.sinks.h.hdfs.threadsPoolSize=15
80 #操作hdfs超时时间
81 #Number of milliseconds allowed for HDFS operations, such as open, write, flush, close. This number should be increased if many HDFS timeout operations are occurring.
82 producer.sinks.h.hdfs.callTimeout=30000
hdfs.round
false
Should the timestamp be rounded down (if true, affects all time based escape sequences except %t)
hdfs.roundValue
1
Rounded down to the highest multiple of this (in the unit configured using hdfs.roundUnit), less than current time.
hdfs.roundUnit
second
The unit of the round down value - second, minute or hour.
83 ############################################
84 # elasticsearch config
85 ############################################
86 producer.channels.c2.type = memory
87 #Timeout in seconds for adding or removing an event
88 producer.channels.c2.keep-alive= 30
89 producer.channels.c2.capacity = 10000
90 producer.channels.c2.transactionCapacity = 10000
91 producer.channels.c2.byteCapacityBufferPercentage = 20
92 producer.channels.c2.byteCapacity = 800000
93
94 producer.sinks.es.channel = c2
95
96 producer.sinks.es.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
97 producer.sinks.es.hostNames = 127.0.0.1:9300
98 #Name of the ElasticSearch cluster to connect to
99 producer.sinks.es.clusterName = sunxucool
100 #Number of events to be written per txn.
101 producer.sinks.es.batchSize = 1000
102 #The name of the index which the date will be appended to. Example ‘flume’ -> ‘flume-yyyy-MM-dd’
103 producer.sinks.es.indexName = flume_es
104 #The type to index the document to, defaults to ‘log’
105 producer.sinks.es.indexType = test
106 producer.sinks.es.serializer = org.apache.flume.sink.elasticsearch.ElasticSearchLogStashEventSerializer
(责任编辑:IT)
1 ############################################ 2 # producer config 3 ############################################ 4 #agent section 5 producer.sources = s 6 producer.channels = c c1 c2 7 producer.sinks = r h es 8 9 #source section 10 producer.sources.s.type =exec 11 producer.sources.s.command = tail -f /usr/local/nginx/logs/test1.log 12 #producer.sources.s.type = spooldir 13 #producer.sources.s.spoolDir = /usr/local/nginx/logs/ 14 #producer.sources.s.fileHeader = true 15 16 producer.sources.s.channels = c c1 c2 17 18 producer.sources.s.interceptors = i 19 #不支持忽略大小写 20 producer.sources.s.interceptors.i.regex = .*\.(css|js|jpg|jpeg|png|gif|ico).* 21 producer.sources.s.interceptors.i.type = org.apache.flume.interceptor.RegexFilteringInterceptor$Builder 22 #不包含 23 producer.sources.s.interceptors.i.excludeEvents = true 24 25 ############################################ 26 # hdfs config 27 ############################################ 28 producer.channels.c.type = memory 29 #Timeout in seconds for adding or removing an event 30 producer.channels.c.keep-alive= 30 31 producer.channels.c.capacity = 10000 32 producer.channels.c.transactionCapacity = 10000 33 producer.channels.c.byteCapacityBufferPercentage = 20 34 producer.channels.c.byteCapacity = 800000 35 36 producer.sinks.r.channel = c 37 38 producer.sinks.r.type = avro 39 producer.sinks.r.hostname = 127.0.0.1 42 producer.sinks.r.port = 10101 43 ############################################ 44 # hdfs config 45 ############################################ 46 producer.channels.c1.type = memory 47 #Timeout in seconds for adding or removing an event 48 producer.channels.c1.keep-alive= 30 49 producer.channels.c1.capacity = 10000 50 producer.channels.c1.transactionCapacity = 10000 51 producer.channels.c1.byteCapacityBufferPercentage = 20 52 producer.channels.c1.byteCapacity = 800000 53 54 producer.sinks.h.channel = c1 55 56 producer.sinks.h.type = hdfs 57 #目录位置 58 producer.sinks.h.hdfs.path = hdfs://127.0.0.1/tmp/flume/%Y/%m/%d 59 #文件前缀 60 producer.sinks.h.hdfs.filePrefix=nginx-%Y-%m-%d-%H 61 producer.sinks.h.hdfs.fileType = DataStream 62 #时间类型必加,不然会报错 63 producer.sinks.h.hdfs.useLocalTimeStamp = true 64 producer.sinks.h.hdfs.writeFormat = Text 65 #hdfs创建多长时间新建文件,0不基于时间 66 #Number of seconds to wait before rolling current file (0 = never roll based on time interval) 67 producer.sinks.h.hdfs.rollInterval=0 68 hdfs多大时新建文件,0不基于文件大小 69 #File size to trigger roll, in bytes (0: never roll based on file size) 70 producer.sinks.h.hdfs.rollSize = 0 71 #hdfs有多少条消息时新建文件,0不基于消息个数 72 #Number of events written to file before it rolled (0 = never roll based on number of events) 73 producer.sinks.h.hdfs.rollCount = 0 74 #批量写入hdfs的个数 75 #number of events written to file before it is flushed to HDFS 76 producer.sinks.h.hdfs.batchSize=1000 77 #flume操作hdfs的线程数(包括新建,写入等) 78 #Number of threads per HDFS sink for HDFS IO ops (open, write, etc.) 79 producer.sinks.h.hdfs.threadsPoolSize=15 80 #操作hdfs超时时间 81 #Number of milliseconds allowed for HDFS operations, such as open, write, flush, close. This number should be increased if many HDFS timeout operations are occurring. 82 producer.sinks.h.hdfs.callTimeout=30000
83 ############################################ 84 # elasticsearch config 85 ############################################ 86 producer.channels.c2.type = memory 87 #Timeout in seconds for adding or removing an event 88 producer.channels.c2.keep-alive= 30 89 producer.channels.c2.capacity = 10000 90 producer.channels.c2.transactionCapacity = 10000 91 producer.channels.c2.byteCapacityBufferPercentage = 20 92 producer.channels.c2.byteCapacity = 800000 93 94 producer.sinks.es.channel = c2 95 96 producer.sinks.es.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink 97 producer.sinks.es.hostNames = 127.0.0.1:9300 98 #Name of the ElasticSearch cluster to connect to 99 producer.sinks.es.clusterName = sunxucool 100 #Number of events to be written per txn. 101 producer.sinks.es.batchSize = 1000 102 #The name of the index which the date will be appended to. Example ‘flume’ -> ‘flume-yyyy-MM-dd’ 103 producer.sinks.es.indexName = flume_es 104 #The type to index the document to, defaults to ‘log’ 105 producer.sinks.es.indexType = test 106 producer.sinks.es.serializer = org.apache.flume.sink.elasticsearch.ElasticSearchLogStashEventSerializer
(责任编辑:IT) |