@@ -115,6 +115,14 @@ var testCases = []TestCase{
115
115
typeSplitting : {`!@£$%^&*()` },
116
116
},
117
117
},
118
+ {
119
+ name : "line length greater than max allowed length" ,
120
+ line : `09:17:38.033366 ▶ INFO route ops sending to dest https://graphite-cortex-ops-blocks-us-east4.grafana.net/graphite/metrics: service_is_carbon-relay-ng.instance_is_carbon-relay-ng-c665b7b-j2trk.mtype_is_counter.dest_is_https_graphite-cortex-ops-blocks-us-east4_grafana_netgraphitemetrics.unit_is_Metric.action_is_drop.reason_is_queue_full 0 1717060658 userid invalid` ,
121
+ want : map [string ][]string {
122
+ typePunctuation : []string (nil ),
123
+ typeSplitting : {`09:` , `17:` , `38.033366` , `▶` , `INFO` , `` , `route` , `ops` , `sending` , `to` , `dest` , `https:` , `//graphite-cortex-ops-blocks-us-east4.grafana.net/graphite/metrics:` , `` , `service_is_carbon-relay-ng.instance_is_carbon-relay-ng-c665b7b-j2trk.mtype_is_counter.dest_is_https_graphite-cortex-ops-blocks-us-east4_grafana_netgraphitemetrics.unit_is_Metric.action_is_drop.reason_is_queue_full` , `0` , `1717060658` , `userid` , `invalid` },
124
+ },
125
+ },
118
126
}
119
127
120
128
func TestTokenizer_Tokenize (t * testing.T ) {
@@ -124,7 +132,7 @@ func TestTokenizer_Tokenize(t *testing.T) {
124
132
}{
125
133
{
126
134
name : typePunctuation ,
127
- tokenizer : newPunctuationTokenizer (),
135
+ tokenizer : newPunctuationTokenizer (360 ),
128
136
},
129
137
{
130
138
name : typeSplitting ,
@@ -149,7 +157,7 @@ func TestTokenizer_TokenizeAndJoin(t *testing.T) {
149
157
}{
150
158
{
151
159
name : typePunctuation ,
152
- tokenizer : newPunctuationTokenizer (),
160
+ tokenizer : newPunctuationTokenizer (DefaultConfig (). MaxAllowedLineLength ),
153
161
},
154
162
{
155
163
name : typeSplitting ,
@@ -168,7 +176,7 @@ func TestTokenizer_TokenizeAndJoin(t *testing.T) {
168
176
}
169
177
170
178
func BenchmarkSplittingTokenizer (b * testing.B ) {
171
- tokenizer := newPunctuationTokenizer ()
179
+ tokenizer := newPunctuationTokenizer (DefaultConfig (). MaxAllowedLineLength )
172
180
173
181
for _ , tt := range testCases {
174
182
tc := tt
@@ -213,9 +221,13 @@ func TestLogFmtTokenizer(t *testing.T) {
213
221
line : `logger=sqlstore.metrics traceID=c933fefbe893411d3be8e1648d6bcf37 t=2024-07-10T16:00:15.564896897Z level=debug msg="query finished" status=success elapsedtime=1.324305ms <REDACTED> error=null` ,
214
222
want : []string {"logger" , "sqlstore.metrics" , "traceID" , "<_>" , "t" , "<_>" , "level" , "debug" , "msg" , "query finished" , "status" , "success" , "elapsedtime" , "1.324305ms" , "<REDACTED>" , "" , "error" , "null" },
215
223
},
224
+ {
225
+ line : `ts=2024-05-30T12:50:36.648377186Z caller=scheduler_processor.go:143 level=warn msg="error contacting scheduler" err="rpc error: code = Unavailable desc = connection error: desc = \"error reading server preface: EOF\"" addr=10.0.151.101:9095 ip=127.0.0.1 userid=1234456` ,
226
+ want : []string (nil ),
227
+ },
216
228
}
217
229
218
- tokenizer := newLogfmtTokenizer (param )
230
+ tokenizer := newLogfmtTokenizer (param , 250 )
219
231
220
232
for _ , tt := range tests {
221
233
t .Run (tt .name , func (t * testing.T ) {
@@ -268,7 +280,7 @@ func TestLogFmtTokenizerJoin(t *testing.T) {
268
280
},
269
281
}
270
282
271
- tokenizer := newLogfmtTokenizer ("" )
283
+ tokenizer := newLogfmtTokenizer ("" , DefaultConfig (). MaxAllowedLineLength )
272
284
273
285
for _ , tt := range tests {
274
286
t .Run ("" , func (t * testing.T ) {
@@ -306,16 +318,23 @@ func TestJsonTokenizer(t *testing.T) {
306
318
want : []string {"successfully" , "discovered" , "15" , "agent" , "IP" , "addresses" },
307
319
pattern : "<_>successfully discovered 15 agent IP addresses<_>" ,
308
320
},
321
+ {
322
+ line : `{"msg":{"actor":{"alternateId":"foo@grafana.com","displayName":"Foo bar","id":"dq23","type":"User"},"authenticationContext":{"authenticationStep":0,"externalSessionId":"123d"},"client":{"device":"Computer","geographicalContext":{"city":"Berlin","country":"DE","state":"Land Berlin"},"ipAddress":"0.0.0.0","userAgent":{"browser":"CHROME","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"},"zone":"null"},"debugContext":{"debugData":{"authMethodFirstEnrollment":"123","authMethodFirstType":"foo","authMethodFirstVerificationTime":"2024-07-02T11:28:03.219Z","authMethodSecondEnrollment":"var","authMethodSecondType":"ddd","authMethodSecondVerificationTime":"2024-07-03T06:59:09.151Z","authnRequestId":"1","dtHash":"1","logOnlySecurityData":"{\"risk\":{\"level\":\"LOW\"},\"behaviors\":{\"New Geo-Location\":\"NEGATIVE\",\"New Device\":\"NEGATIVE\",\"New IP\":\"NEGATIVE\",\"New State\":\"NEGATIVE\",\"New Country\":\"NEGATIVE\",\"Velocity\":\"NEGATIVE\",\"New City\":\"NEGATIVE\"}}","requestId":"1","threatSuspected":"false","url":"/foo?"}},"displayMessage":"Evaluation of sign-on policy","eventType":"policy.evaluate_sign_on","legacyEventType":"app.oauth2.token.grant.refresh_token_success","outcome":{"reason":"Sign-on policy evaluation resulted in AUTHENTICATED","result":"ALLOW"},"published":"2024-07-03T09:19:59.973Z","request":{"ipChain":[{"geographicalContext":{"city":"Berlin","country":"Germany","geolocation":{"lat":52.5363,"lon":13.4169},"postalCode":"10435","state":"Land Berlin"},"ip":"95.90.234.241","version":"V4"}]},"securityContext":{"asNumber":3209,"asOrg":"kabel deutschland breitband customer 19","domain":"kabel-deutschland.de","isProxy":false,"isp":"vodafone gmbh"},"severity":"INFO","target":[{"alternateId":"Salesforce.com","detailEntry":{"signOnModeEvaluationResult":"AUTHENTICATED","signOnModeType":"SAML_2_0"},"displayName":"Salesforce.com","id":"0oa5sfmj3hz0mTgoW357","type":"AppInstance"},{"alternateId":"unknown","detailEntry":{"policyRuleFactorMode":"2FA"},"displayName":"Catch-all Rule","id":"1","type":"Rule"}],"transaction":{"detail":{},"id":"1","type":"WEB"},"context":[{"repo":{"id":27826205,"name":"hermanwahyudi/selenium","url":"https://api.github.com/repos/hermanwahyudi/selenium"},"payload":{"push_id":536863976,"size":1,"distinct_size":0,"ref":"refs/heads/master","head":"1b58dd4c4e14ea9cf5212b981774bd448a266c3c","before":"20b10e3a605bd177efff62f1130943774ac07bf3","commits":[{"sha":"1b58dd4c4e14ea9cf5212b981774bd448a266c3c","author":{"email":"2bb20d8a71fb7adbc1d6239cc9ff4130f26819dc@gmail.com","name":"Herman"},"message":"Update README.md","distinct":false,"url":"https://api.github.com/repos/hermanwahyudi/selenium/commits/1b58dd4c4e14ea9cf5212b981774bd448a266c3c"}]}},{"repo":{"id":27826205,"name":"hermanwahyudi/selenium","url":"https://api.github.com/repos/hermanwahyudi/selenium"},"payload":{"push_id":536863976,"size":1,"distinct_size":0,"ref":"refs/heads/master","head":"1b58dd4c4e14ea9cf5212b981774bd448a266c3c","before":"20b10e3a605bd177efff62f1130943774ac07bf3","commits":[{"sha":"1b58dd4c4e14ea9cf5212b981774bd448a266c3c","author":{"email":"2bb20d8a71fb7adbc1d6239cc9ff4130f26819dc@gmail.com","name":"Herman"},"message":"Update README.md","distinct":false,"url":"https://api.github.com/repos/hermanwahyudi/selenium/commits/1b58dd4c4e14ea9cf5212b981774bd448a266c3c"}]}}],"uuid":"1","version":"0"},"level":"info","type":"received event","time":"2024-07-03T09:19:59Z"}` ,
323
+ want : []string (nil ),
324
+ pattern : "" ,
325
+ },
309
326
}
310
327
311
- tokenizer := newJSONTokenizer (param )
328
+ tokenizer := newJSONTokenizer (param , DefaultConfig (). MaxAllowedLineLength )
312
329
313
330
for _ , tt := range tests {
314
331
t .Run (tt .name , func (t * testing.T ) {
315
332
got , state := tokenizer .Tokenize (tt .line , nil , nil )
316
333
require .Equal (t , tt .want , got )
317
- pattern := tokenizer .Join (got , state )
318
- require .Equal (t , tt .pattern , pattern )
334
+ if len (got ) == len (tt .want ) && len (tt .want ) != 0 {
335
+ pattern := tokenizer .Join (got , state )
336
+ require .Equal (t , tt .pattern , pattern )
337
+ }
319
338
})
320
339
}
321
340
}
0 commit comments