Skip to content

Commit dea5d78

Browse files
feat: add lines skipped metric to pattern ingesters (#14997)
1 parent 0f242e7 commit dea5d78

File tree

7 files changed

+112
-31
lines changed

7 files changed

+112
-31
lines changed

‎pkg/pattern/drain/drain.go

+20-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"unsafe"
3131

3232
"github.com/hashicorp/golang-lru/v2/simplelru"
33+
"github.com/prometheus/client_golang/prometheus"
3334
"github.com/prometheus/common/model"
3435

3536
"github.com/grafana/loki/v3/pkg/logproto"
@@ -211,12 +212,29 @@ func (d *Drain) Train(content string, ts int64) *LogCluster {
211212
if !d.limiter.Allow() {
212213
return nil
213214
}
214-
d.tokens, d.state = d.tokenizer.Tokenize(content, d.tokens, d.state)
215+
var linesSkipped *prometheus.CounterVec
216+
if d.metrics != nil {
217+
linesSkipped = d.metrics.LinesSkipped
218+
}
219+
d.tokens, d.state = d.tokenizer.Tokenize(content, d.tokens, d.state, linesSkipped)
220+
if d.tokens == nil && d.state == nil {
221+
return nil
222+
}
223+
215224
return d.train(d.tokens, d.state, ts)
216225
}
217226

218227
func (d *Drain) train(tokens []string, state interface{}, ts int64) *LogCluster {
219228
if len(tokens) < 4 {
229+
if d.metrics != nil && d.metrics.LinesSkipped != nil {
230+
d.metrics.LinesSkipped.WithLabelValues(TooFewTokens).Inc()
231+
}
232+
return nil
233+
}
234+
if len(tokens) > 80 {
235+
if d.metrics != nil && d.metrics.LinesSkipped != nil {
236+
d.metrics.LinesSkipped.WithLabelValues(TooManyTokens).Inc()
237+
}
220238
return nil
221239
}
222240
if d.metrics != nil {
@@ -255,7 +273,7 @@ func (d *Drain) train(tokens []string, state interface{}, ts int64) *LogCluster
255273
}
256274

257275
func (d *Drain) TrainPattern(content string, samples []*logproto.PatternSample) *LogCluster {
258-
tokens, state := d.tokenizer.Tokenize(content, d.tokens, d.state)
276+
tokens, state := d.tokenizer.Tokenize(content, d.tokens, d.state, d.metrics.LinesSkipped)
259277
matchCluster := d.treeSearch(d.rootNode, tokens, d.config.SimTh, true)
260278
// Match no existing log cluster
261279
if matchCluster == nil {

‎pkg/pattern/drain/drain_test.go

+37-18
Large diffs are not rendered by default.

‎pkg/pattern/drain/line_tokenizer.go

+33-6
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ import (
88

99
"github.com/buger/jsonparser"
1010
gologfmt "github.com/go-logfmt/logfmt"
11+
"github.com/prometheus/client_golang/prometheus"
1112

1213
"github.com/grafana/loki/v3/pkg/logql/log/logfmt"
1314
)
1415

1516
type LineTokenizer interface {
16-
Tokenize(line string, tokens []string, state interface{}) ([]string, interface{})
17+
Tokenize(line string, tokens []string, state interface{}, linesDropped *prometheus.CounterVec) ([]string, interface{})
1718
Join(tokens []string, state interface{}) string
1819
Clone(tokens []string, state interface{}) ([]string, interface{})
1920
}
@@ -56,8 +57,16 @@ func newPunctuationTokenizer(maxLineLength int) *punctuationTokenizer {
5657
}
5758
}
5859

59-
func (p *punctuationTokenizer) Tokenize(line string, tokens []string, state interface{}) ([]string, interface{}) {
60+
func (p *punctuationTokenizer) Tokenize(
61+
line string,
62+
tokens []string,
63+
state interface{},
64+
linesDropped *prometheus.CounterVec,
65+
) ([]string, interface{}) {
6066
if len(line) > p.maxLineLength {
67+
if linesDropped != nil {
68+
linesDropped.WithLabelValues(LineTooLong).Inc()
69+
}
6170
return nil, nil
6271
}
6372

@@ -131,7 +140,12 @@ func (p *punctuationTokenizer) Clone(tokens []string, state interface{}) ([]stri
131140

132141
type splittingTokenizer struct{}
133142

134-
func (splittingTokenizer) Tokenize(line string, tokens []string, state interface{}) ([]string, interface{}) {
143+
func (splittingTokenizer) Tokenize(
144+
line string,
145+
tokens []string,
146+
state interface{},
147+
_ *prometheus.CounterVec,
148+
) ([]string, interface{}) {
135149
numEquals := strings.Count(line, "=")
136150
numColons := strings.Count(line, ":")
137151
numSpaces := strings.Count(line, " ")
@@ -209,8 +223,16 @@ func newLogfmtTokenizer(varReplace string, maxLineLength int) *logfmtTokenizer {
209223
}
210224
}
211225

212-
func (t *logfmtTokenizer) Tokenize(line string, tokens []string, _ interface{}) ([]string, interface{}) {
226+
func (t *logfmtTokenizer) Tokenize(
227+
line string,
228+
tokens []string,
229+
_ interface{},
230+
linesDropped *prometheus.CounterVec,
231+
) ([]string, interface{}) {
213232
if len(line) > t.maxLineLength {
233+
if linesDropped != nil {
234+
linesDropped.WithLabelValues(LineTooLong).Inc()
235+
}
214236
return nil, nil
215237
}
216238

@@ -277,7 +299,12 @@ func newJSONTokenizer(varReplace string, maxLineLength int, fieldsToTokenize []s
277299
}
278300
}
279301

280-
func (t *jsonTokenizer) Tokenize(line string, tokens []string, state interface{}) ([]string, interface{}) {
302+
func (t *jsonTokenizer) Tokenize(
303+
line string,
304+
tokens []string,
305+
state interface{},
306+
linesDropped *prometheus.CounterVec,
307+
) ([]string, interface{}) {
281308
var found []byte
282309
for _, key := range t.fieldsToTokenize {
283310
msg, ty, _, err := jsonparser.Get(unsafeBytes(line), key)
@@ -297,7 +324,7 @@ func (t *jsonTokenizer) Tokenize(line string, tokens []string, state interface{}
297324
return nil, nil
298325
}
299326

300-
return t.punctuationTokenizer.Tokenize(foundLine, tokens, state)
327+
return t.punctuationTokenizer.Tokenize(foundLine, tokens, state, linesDropped)
301328
}
302329

303330
func (t *jsonTokenizer) Join(tokens []string, state interface{}) string {

‎pkg/pattern/drain/line_tokenizer_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ func TestTokenizer_Tokenize(t *testing.T) {
143143
for _, tt := range tests {
144144
for _, tc := range testCases {
145145
t.Run(tt.name+":"+tc.name, func(t *testing.T) {
146-
got, _ := tt.tokenizer.Tokenize(tc.line, nil, nil)
146+
got, _ := tt.tokenizer.Tokenize(tc.line, nil, nil, nil)
147147
require.Equal(t, tc.want[tt.name], got)
148148
})
149149
}
@@ -168,7 +168,7 @@ func TestTokenizer_TokenizeAndJoin(t *testing.T) {
168168
for _, tt := range tests {
169169
for _, tc := range testCases {
170170
t.Run(tt.name+":"+tc.name, func(t *testing.T) {
171-
got := tt.tokenizer.Join(tt.tokenizer.Tokenize(tc.line, nil, nil))
171+
got := tt.tokenizer.Join(tt.tokenizer.Tokenize(tc.line, nil, nil, nil))
172172
require.Equal(t, tc.line, got)
173173
})
174174
}
@@ -184,7 +184,7 @@ func BenchmarkSplittingTokenizer(b *testing.B) {
184184
b.ResetTimer()
185185
b.ReportAllocs()
186186
for i := 0; i < b.N; i++ {
187-
tokenizer.Tokenize(tc.line, nil, nil)
187+
tokenizer.Tokenize(tc.line, nil, nil, nil)
188188
}
189189
})
190190
}
@@ -231,7 +231,7 @@ func TestLogFmtTokenizer(t *testing.T) {
231231

232232
for _, tt := range tests {
233233
t.Run(tt.name, func(t *testing.T) {
234-
got, _ := tokenizer.Tokenize(tt.line, nil, nil)
234+
got, _ := tokenizer.Tokenize(tt.line, nil, nil, nil)
235235
require.Equal(t, tt.want, got)
236236
})
237237
}
@@ -330,7 +330,7 @@ func TestJsonTokenizer(t *testing.T) {
330330

331331
for _, tt := range tests {
332332
t.Run(tt.name, func(t *testing.T) {
333-
got, state := tokenizer.Tokenize(tt.line, nil, nil)
333+
got, state := tokenizer.Tokenize(tt.line, nil, nil, nil)
334334
require.Equal(t, tt.want, got)
335335
if len(got) == len(tt.want) && len(tt.want) != 0 {
336336
pattern := tokenizer.Join(got, state)

‎pkg/pattern/drain/metrics.go

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ const (
1010
FormatLogfmt = "logfmt"
1111
FormatJSON = "json"
1212
FormatUnknown = "unknown"
13+
TooFewTokens = "too_few_tokens"
14+
TooManyTokens = "too_many_tokens"
15+
LineTooLong = "line_too_long"
1316
)
1417

1518
var logfmtRegex = regexp.MustCompile("^(\\w+?=([^\"]\\S*?|\".+?\") )*?(\\w+?=([^\"]\\S*?|\".+?\"))+$")
@@ -31,6 +34,7 @@ type Metrics struct {
3134
PatternsEvictedTotal prometheus.Counter
3235
PatternsPrunedTotal prometheus.Counter
3336
PatternsDetectedTotal prometheus.Counter
37+
LinesSkipped *prometheus.CounterVec
3438
TokensPerLine prometheus.Observer
3539
StatePerLine prometheus.Observer
3640
}

‎pkg/pattern/metrics.go

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ type ingesterMetrics struct {
99
flushQueueLength prometheus.Gauge
1010
patternsDiscardedTotal *prometheus.CounterVec
1111
patternsDetectedTotal *prometheus.CounterVec
12+
linesSkipped *prometheus.CounterVec
1213
tokensPerLine *prometheus.HistogramVec
1314
statePerLine *prometheus.HistogramVec
1415
samples *prometheus.CounterVec
@@ -34,6 +35,12 @@ func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *inges
3435
Name: "patterns_detected_total",
3536
Help: "The total number of patterns detected from incoming log lines.",
3637
}, []string{"tenant", "format"}),
38+
linesSkipped: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
39+
Namespace: metricsNamespace,
40+
Subsystem: "pattern_ingester",
41+
Name: "patterns_dropped_total",
42+
Help: "The total number of log lines skipped for pattern recognition.",
43+
}, []string{"tenant", "reason"}),
3744
tokensPerLine: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
3845
Namespace: metricsNamespace,
3946
Subsystem: "pattern_ingester",

‎pkg/pattern/stream.go

+6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/grafana/loki/v3/pkg/pattern/drain"
1212
"github.com/grafana/loki/v3/pkg/pattern/iter"
1313

14+
"github.com/prometheus/client_golang/prometheus"
1415
"github.com/prometheus/common/model"
1516
"github.com/prometheus/prometheus/model/labels"
1617
)
@@ -37,6 +38,10 @@ func newStream(
3738
drainCfg *drain.Config,
3839
drainLimits drain.Limits,
3940
) (*stream, error) {
41+
linesSkipped, err := metrics.linesSkipped.CurryWith(prometheus.Labels{"tenant": instanceID})
42+
if err != nil {
43+
return nil, err
44+
}
4045
return &stream{
4146
fp: fp,
4247
labels: labels,
@@ -47,6 +52,7 @@ func newStream(
4752
PatternsEvictedTotal: metrics.patternsDiscardedTotal.WithLabelValues(instanceID, guessedFormat, "false"),
4853
PatternsPrunedTotal: metrics.patternsDiscardedTotal.WithLabelValues(instanceID, guessedFormat, "true"),
4954
PatternsDetectedTotal: metrics.patternsDetectedTotal.WithLabelValues(instanceID, guessedFormat),
55+
LinesSkipped: linesSkipped,
5056
TokensPerLine: metrics.tokensPerLine.WithLabelValues(instanceID, guessedFormat),
5157
StatePerLine: metrics.statePerLine.WithLabelValues(instanceID, guessedFormat),
5258
}),

0 commit comments

Comments
 (0)