Skip to content

Commit a8fc494

Browse files
authored
Merge pull request #1 from DTStack/master
update20170904
2 parents 8080aa5 + 39660bf commit a8fc494

File tree

107 files changed

+3697
-1747
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+3697
-1747
lines changed

‎.gitignore‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ target/*
55
.classpath
66
.metadata/
77
.idea/
8+
plugin/
9+
*.iml

‎README.md‎

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,72 @@
1-
用java版本重写logstash目的是提升性能,跟ruby logstash 对比 请看 https://github.com/DTStack/jlogstash-performance-testing
1+
# 注释:
22

3-
不同的业务,性能会有所不同,dtstack 后台日志解析 java 版本是ruby版本的5倍,在单台4g 4cores 虚拟机上。
3+
  jlogstash前期的有部分代码引用了hangout项目里的代码,这里也感谢hangout的作者。
4+
5+
# 说明:
46

5-
jlogstash 的参数配置和使用看wiki介绍,现在的插件跟ruby版本相比还太少,希望更多的人参与开发。
7+
用java版本重写logstash目的是提升性能,跟ruby logstash 对比 请看 https://github.com/DTStack/jlogstash-performance-testing
68

7-
各个插件代码在jlogstash-input-plugin,jlogstash-output-plugin,jlogstash-filter-plugin
9+
不同的业务,性能会有所不同,dtstack 后台日志解析 java 版本是ruby版本的5倍,在单台4g 4cores 虚拟机上
810

11+
jlogstash 的参数配置和使用看wiki介绍,现在的插件跟ruby版本相比还太少,希望更多的人参与开发。
12+
13+
各个插件代码在jlogstash-input-plugin,jlogstash-output-plugin,jlogstash-filter-plugin。
14+
15+
16+
# Inputs详情:
17+
https://github.com/DTStack/jlogstash-input-plugin/blob/master/README.md
18+
19+
# Filters详情:
20+
https://github.com/DTStack/jlogstash-filter-plugin/blob/master/README.md
21+
22+
# Outputs详情:
23+
https://github.com/DTStack/jlogstash-output-plugin/blob/master/README.md
24+
25+
# Jar放置目录(编译的jar必须要有版本号 ):
26+
27+
jlogstash 核心代码放在jlogstash/lib/下
28+
29+
插件的代码分别的放到jlogstash/plugin 下的filter,input,output目录下
30+
31+
# Jlogstash 启动参数:
32+
33+
-f:配置文件 yaml格式路径
34+
35+
-l:日志文件路径
36+
37+
-i:input queue size coefficient 默认 200f/1024
38+
39+
-w:filter work number 默认是根据的机器cpu核数+2
40+
41+
-o:output work number 默认是根据的机器cpu核数
42+
43+
-c:output queue size coefficient 默认 500f/1024
44+
45+
-dev: 开发模式,直接在pom.xml引用包即可。
46+
47+
v: error级别
48+
49+
vv: warn级别
50+
51+
vvv:info级别
52+
53+
vvvv:debug级别
54+
55+
vvvvv:trace级别
56+
57+
# 插件开发:
58+
59+
 1.现在各自的plugin 的包 都会有各自的classloder去加���,parent classloder是AppClassLoder,所以各自的plugin的代码即使引用了相同的第三的jar版   本不一样也不会导致版本冲突
60+
 
61+
62+
2.各个插件的代码不能相互引用,如果有公共代码需要打入到各自的jar包中
63+
64+
3.所需依赖到maven中心库 搜索 jlogstash(http://search.maven.org/https://oss.sonatype.org)
65+
66+
4.插件开发样列 https://github.com/DTStack/jlogstash/tree/master/src/test/java/com/dtstack/jlogstash
67+
68+
5.每一个plugin打的包名的前缀要跟插件的类名一致,不区分大小写,不然会报类找不到,列如:input.kafka-1.0.0-with-dependencies.jar 或
69+
   kafka-1.0.0-with-dependencies.jar
70+
71+
# 招聘:
72+
https://www.dtstack.com/joinus

‎TODO.md‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1+
技术架构模型调整

‎bin/jlogstash.sh‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ JAVA_OPTS="$JAVA_OPTS -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOcc
2222
#Comment to speed up starting time
2323
#JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
2424

25-
exec java $JAVA_OPTS -cp $basedir/lib/*:$basedir/plugin/filter/*:$basedir/plugin/output/*:$basedir/plugin/input/* com.dtstack.logstash.Main "$@"
25+
exec java $JAVA_OPTS -cp $basedir/lib/* com.dtstack.jlogstash.Main "$@"

‎example.yml‎

Lines changed: 160 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,170 @@
11
inputs:
2+
# - PlainFile:
3+
# path:
4+
# - /tmp/file/^file.*\.txt$
5+
# heartbeatHost: 172.16.1.52:8854
6+
# userToken: kRhisbdoTQMCZU5KqQqGkQ7sDA7BM9kpldnQ5Nf2al8ER9yp
7+
# offsetDbSyncIntervalMs: 60000
8+
# heartbeatIntevalMs: 60000
9+
210
- Kafka:
311
codec: json
412
encoding: UTF8 # defaut UTF8
5-
topic:
6-
dt_all_test_log: 6
13+
topic:
14+
{"dt_all_log": 1}
15+
716
consumerSettings:
8-
group.id: jlogstashvvvvv
9-
zookeeper.connect: 127.0.0.1:2181
17+
group.id: dt_all_log_group_hao
18+
zookeeper.connect: 172.16.1.181:2181,172.16.1.186:2181,172.16.1.226:2181/kafka2
1019
auto.commit.interval.ms: "1000"
11-
auto.offset.reset: smallest
12-
20+
# - KafkaReset:
21+
# codec: json
22+
# encoding: UTF8 # defaut UTF8
23+
# minTime: "2017-08-11 20:33:33"
24+
# maxTime: "2017-08-11 20:34:33"
25+
# topic:
26+
# {"dt_all_log": 3}
27+
# consumerSettings:
28+
# group.id: dt_all_log_group_hao
29+
# zookeeper.connect: 172.16.1.181:2181,172.16.1.186:2181,172.16.1.226:2181/kafka2
30+
# auto.commit.interval.ms: "1000"
31+
# - Netty:
32+
# port: 8635
33+
# whiteListPath:
34+
# codec: json
35+
# isExtract: false
1336
filters:
14-
- dtstack.jdtlogparser.DtLogParser:
15-
redisHost: 127.0.0.1
37+
- Performance:
38+
interval: 1
39+
path: /tmp/filter-performance-%{+YYYY.MM.dd}.txt
40+
timeZone: Asia/Shanghai
41+
monitorPath: {"/tmp/filter-performance-%{+YYYY.MM.dd}.txt":"8"}
42+
- dtstack.jdtloguserauth.DtLogUserAuth:
43+
apiServer: 172.16.1.52:8668
44+
useSsl: false
45+
redisHost: 172.16.1.52
1646
redisPort: 6379
17-
debug: false
18-
- dtstack.jdtlogipip.DtLogIpIp:
19-
- dtstack.jdtlogsecurity.DtLogSecurity:
20-
- DateISO8601:
21-
match: {"timestamp":{"srcFormat":"dd/MMM/yyyy:HH:mm:ss Z","target":"timestamp","locale":"en"}}
22-
- Remove:
23-
fields: ["user_token","IPORHOST=~/\\./"]
47+
isRedis: true
48+
redisDB: 0
49+
redisPassword: taukwnlwrd9
50+
- dtstack.jdtlogparser.DtLogParser:
51+
apiServer: 172.16.1.52:82
52+
useSsl: false
53+
redisHost: 172.16.1.52
54+
redisPort: 6379
55+
isRedis: true
56+
redisDB: 0
57+
redisPassword: taukwnlwrd9
58+
timeWasteConfig: {"/tmp/timewaste-%{+yyyy.MM.dd}.log":"7"} #记录解析日志耗时的配置,格式和Performance的一致,key为独立保存文件的路径,val为保留的最近日志数
59+
timeWasteLogMaxFreq: 100 #设置解析日志耗时的写io的频率的上限,避免把磁盘打爆
60+
parsedTimeThreshold: 2
61+
parseFailedConfig : {"/tmp/parsefailed-%{+yyyy.MM.dd}.log":"7"} #记录解析失败的配置,格式和Performance的一致,key为独立保存文件的路径,val为保留的最近日志数
62+
parseFailedLogMaxFreq: 100 #设置解析失败时的写io的频率的上限,避免把磁盘打爆
2463

2564
outputs:
26-
- File:
27-
path: /Users/sishuyss/ysq_%{tenant_id}_%{+YYYY.MM.dd}.txt
28-
timezone: Asia/Shanghai
29-
- Performance:
30-
path: /Users/sishuyss/performance.txt
31-
# - Elasticsearch:
32-
# hosts: ["172.16.1.185:9300","172.16.1.188:9300"]
33-
# hosts: ["127.0.0.1:9300"]
34-
# indexTimezone: Asia/Shanghai
35-
# cluster: tes_dtstack
36-
# concurrentRequests: 2
37-
# index: 'dtlog-%{tenant_id}-%{+YYYY.MM.dd}'
38-
# documentType: logs # default logs
39-
# bulkActions: 40000 #default 20000
40-
# bulkSize: 30 # default 15 MB
41-
# flushInterval: 3 # default 10 seconds
42-
# timezone: "Asia/Shanghai" # defaut UTC 时区. 只用于生成索引名字的字符串格式化
43-
# sniff: false #default true
65+
- Performance:
66+
interval: 1
67+
path: /tmp/beat-performance-%{+YYYY.MM.dd}.txt
68+
timeZone: Asia/Shanghai
69+
monitorPath: {"/tmp/beat-performance-%{+YYYY.MM.dd}.txt":"8"}
70+
- Odps:
71+
configs:
72+
redis.address: redis://:taukwnlwrd9@172.16.1.52:6379/1
73+
redis.max.idle: 100
74+
redis.max.total: 1024
75+
redis.max.wait.mills: 3000
76+
redis.timeout: 2000
77+
redis.map.info.key: od-ps-cfg
78+
redis.queue.info.key: od-ps-cfg-msg
79+
http.map.info.api: http://172.16.1.52:81/api/v1/odps/provide_task_list/
80+
task.thread.pool.size: 5000
81+
#task.thread.cycle.commit.time: 30000
82+
task.thread.commit.interval: 30000
83+
task.tunnel.timezone: Asia/Shanghai
84+
task.tunnel.retry.limit: 720 #commit日志失败的重试次数
85+
task.tunnel.retry.interval: 5 #commit日志的间隔时间,秒级别
86+
task.partitions.lru.size: 30000
87+
task.report.status.address: 172.16.1.52:81
88+
task.report.status.interval: 300000
89+
scala.kafka.producer.brokerlist: 172.16.1.145:9092
90+
scala.kafka.zookeeper: 172.16.1.181:2181,172.16.1.186:2181,172.16.1.226:2181/kafka
91+
task.retry,kafka.groupid: odps_retry_event_group_test
92+
task.retry.kafka.topic: odps_retry_event_topic_test
93+
94+
# - Elasticsearch5:
95+
# hosts: ["172.16.1.145:9300"]
96+
# indexTimezone: "UTC"
97+
# cluster: poc_dtstack
98+
# concurrentRequests: 1
99+
# index: 'dtlog-%{tenant_id}-%{appname}-%{keeptype}-%{+YYYY.MM.dd}'
100+
# errorEventLogConfig: {"/tmp/error-event-%{+YYYY.MM.dd}.txt":"3"}
101+
# ERROR_PROTECT_KEYS: "@timestamp,appname,keeptype,logtype,tag,message,timestamp,local_ip,tenant_id,hostname,path,agent_type,offset,uuid,bajie_test"
102+
# documentType: logs # default logs
103+
# consistency: true # defalut false
104+
105+
106+
107+
# inputs:
108+
# # - Beats:
109+
# # codec: json
110+
# # port: 8635
111+
# - Kafka:
112+
# codec: json
113+
# encoding: UTF8 # defaut UTF8
114+
# topic:
115+
# dt_all_log: 5
116+
# consumerSettings:
117+
# group.id: dt_all_log_group
118+
# zookeeper.connect: 172.16.1.181:2181,172.16.1.186:2181,172.16.1.226:2181/kafka
119+
# auto.commit.interval.ms: "1000"
120+
# filters:
121+
# # - Performance:
122+
# # path: /home/admin/jlogserver/logs/beat-filters-performance-%{+YYYY.MM.dd}.txt
123+
# # timeZone: Asia/Shanghai
124+
# # monitorPath: {"/home/admin/jlogserver/logs/beat-filters-performance-%{+YYYY.MM.dd}.txt":"8"}
125+
# - dtstack.jdtloguserauth.DtLogUserAuth:
126+
# apiServer: 172.16.1.52
127+
# useSsl: false
128+
# redisHost: 172.16.1.52
129+
# redisPort: 6379
130+
# isRedis: true
131+
# redisDB: 1
132+
# redisPassword: taukwnlwrd9
133+
# - Add:
134+
# fields: {"agent_type":"@metadata.beat","hostname":"beat.hostname","host":"beat.name"}
135+
# - Remove:
136+
# fields: ["@metadata","count","offset","beat"]
137+
# - Rename:
138+
# fields: {"source":"path"}
139+
# - dtstack.jdtlogparser.DtLogParser:
140+
# apiServer: 172.16.1.52:81
141+
# useSsl: false
142+
# redisHost: 172.16.1.52
143+
# redisPort: 6379
144+
# isRedis: true
145+
# redisDB: 0
146+
# redisPassword: taukwnlwrd9
147+
# - dtstack.jdtlogcreatemessage.DtLogCreateMessage:
148+
# repeatFields: ["path"]
149+
# - Flow:
150+
# configs:
151+
# flow.control.counttype: unset
152+
# flow.control.threshold: 10KB
153+
# flow.stat.counttype: unset
154+
# flow.stat.report.commit.delay.second: 3
155+
# flow.stat.report.interval: 1000
156+
# flow.stat.report.addr.template: "http://172.16.10.123:8854/api/logagent/test?uuid=%s&time=%s&bandwidth=%s"
157+
# outputs:
158+
# # - Performance:
159+
# # path: /home/admin/jlogserver/logs/beat-performance-%{+YYYY.MM.dd}.txt
160+
# # timeZone: Asia/Shanghai
161+
# # monitorPath: {"/tmp/output-performance-%{+YYYY.MM.dd}.txt":"8"}
162+
# - Netty:
163+
# host: 172.16.1.58
164+
# port: 8635
165+
# openCompression: true
166+
# compressionLevel: 6
167+
# openCollectIp: true
168+
# # format: ${HOSTNAME} ${appname} [${user_token} type=${logtype} tag=${logtag}] ${path} jlogstash/$${timestamp}/$${message}
169+
170+

‎jlogstash.iml‎

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
3+
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
4+
<output url="file://$MODULE_DIR$/target/classes" />
5+
<output-test url="file://$MODULE_DIR$/target/test-classes" />
6+
<content url="file://$MODULE_DIR$">
7+
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
8+
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
9+
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
10+
<excludeFolder url="file://$MODULE_DIR$/target" />
11+
</content>
12+
<orderEntry type="inheritedJdk" />
13+
<orderEntry type="sourceFolder" forTests="false" />
14+
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
15+
<orderEntry type="library" name="Maven: joda-time:joda-time:2.8.2" level="project" />
16+
<orderEntry type="library" name="Maven: org.freemarker:freemarker-gae:2.3.23" level="project" />
17+
<orderEntry type="library" name="Maven: org.yaml:snakeyaml:1.16" level="project" />
18+
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.4" level="project" />
19+
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
20+
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
21+
<orderEntry type="library" name="Maven: com.google.guava:guava:19.0" level="project" />
22+
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
23+
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.4" level="project" />
24+
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
25+
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.9" level="project" />
26+
<orderEntry type="library" name="Maven: redis.clients:jedis:2.8.1" level="project" />
27+
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
28+
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.13" level="project" />
29+
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.9.13" level="project" />
30+
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.1.7" level="project" />
31+
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.1.7" level="project" />
32+
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.20" level="project" />
33+
<orderEntry type="library" name="Maven: io.thekraken:grok:0.1.4" level="project" />
34+
<orderEntry type="library" name="Maven: com.github.tony19:named-regexp:0.2.3" level="project" />
35+
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.2" level="project" />
36+
</component>
37+
<component name="sonarModuleSettings">
38+
<option name="localAnalysisScripName" value="&lt;PROJECT&gt;" />
39+
<option name="serverName" value="&lt;PROJECT&gt;" />
40+
</component>
41+
</module>

0 commit comments

Comments
 (0)