Skip to content

Commit 7033091

Browse files
authored
feat: Detect fields based on per-tenant configuration and put them into structured metadata at ingest time (#15188)
This PR introduces a new feature that allows for extraction of "fields" into structured metadata at ingest time. Fields can either be regular labels, structured metadata keys, or keys from `logfmt` or `json` formatted log lines. The fields are defined in a per-tenant configuration as `map[string][]string`, where the key is the target key of the structured metadata, and the value is the list of source fields in given order and the order given above. Example configuration: ```yaml limits_config: discover_generic_fields: fields: trace_id: - "trace_id" - "TRACE_ID" - "traceID" - "TraceID" org_id: - "org_id" - "tenant_id" - "user_id" ``` While parsing of log lines comes with a certain penalty at ingest time (increased latency and CPU usage on distributors), the idea is to extract certain fields once to avoid parsing the log lines every single time at query time. This is mainly useful in combination with bloom filters. **JSONpath support** Should the value of the config map support jsonpath expression, such as ``` limits_config: discover_generic_fields: fields: ticket_id: - "message.ticket.id" ``` Where the log line looks like this: ```json {"timestamp": 1733128051000, "message": {"ticket": {"id": "2024-d95f87018cdb1f10"}}} ``` --- Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
1 parent d8dc10f commit 7033091

File tree

9 files changed

+327
-73
lines changed

9 files changed

+327
-73
lines changed

‎docs/sources/shared/configuration.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3400,6 +3400,12 @@ The `limits_config` block configures global and per-tenant limits in Loki. The v
34003400
# CLI flag: -validation.increment-duplicate-timestamps
34013401
[increment_duplicate_timestamp: <boolean> | default = false]
34023402

3403+
# Experimental: Detect fields from stream labels, structured metadata, or
3404+
# json/logfmt formatted log line and put them into structured metadata of the
3405+
# log entry.
3406+
discover_generic_fields:
3407+
[fields: <map of string to list of strings>]
3408+
34033409
# If no service_name label exists, Loki maps a single label from the configured
34043410
# list to service_name. If none of the configured labels exist in the stream,
34053411
# label is set to unknown_service. Empty list disables setting the label.

‎pkg/distributor/distributor.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"math"
88
"net/http"
9+
"runtime/pprof"
910
"slices"
1011
"sort"
1112
"strconv"
@@ -460,8 +461,9 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
460461

461462
now := time.Now()
462463
validationContext := d.validator.getValidationContextForTime(now, tenantID)
463-
levelDetector := newLevelDetector(validationContext)
464-
shouldDiscoverLevels := levelDetector.shouldDiscoverLogLevels()
464+
fieldDetector := newFieldDetector(validationContext)
465+
shouldDiscoverLevels := fieldDetector.shouldDiscoverLogLevels()
466+
shouldDiscoverGenericFields := fieldDetector.shouldDiscoverGenericFields()
465467

466468
shardStreamsCfg := d.validator.Limits.ShardStreams(tenantID)
467469
maybeShardByRate := func(stream logproto.Stream, pushSize int) {
@@ -547,10 +549,22 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
547549
}
548550
}
549551
if shouldDiscoverLevels {
550-
logLevel, ok := levelDetector.extractLogLevel(lbs, structuredMetadata, entry)
551-
if ok {
552-
entry.StructuredMetadata = append(entry.StructuredMetadata, logLevel)
553-
}
552+
pprof.Do(ctx, pprof.Labels("action", "discover_log_level"), func(_ context.Context) {
553+
logLevel, ok := fieldDetector.extractLogLevel(lbs, structuredMetadata, entry)
554+
if ok {
555+
entry.StructuredMetadata = append(entry.StructuredMetadata, logLevel)
556+
}
557+
})
558+
}
559+
if shouldDiscoverGenericFields {
560+
pprof.Do(ctx, pprof.Labels("action", "discover_generic_fields"), func(_ context.Context) {
561+
for field, hints := range fieldDetector.validationContext.discoverGenericFields {
562+
extracted, ok := fieldDetector.extractGenericField(field, hints, lbs, structuredMetadata, entry)
563+
if ok {
564+
entry.StructuredMetadata = append(entry.StructuredMetadata, extracted)
565+
}
566+
}
567+
})
554568
}
555569
stream.Entries[n] = entry
556570

‎pkg/distributor/level_detection.go renamed to ‎pkg/distributor/field_detection.go

Lines changed: 88 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313

1414
"github.com/grafana/loki/v3/pkg/loghttp/push"
1515
"github.com/grafana/loki/v3/pkg/logproto"
16+
"github.com/grafana/loki/v3/pkg/logql/log"
17+
"github.com/grafana/loki/v3/pkg/logql/log/jsonexpr"
1618
"github.com/grafana/loki/v3/pkg/logql/log/logfmt"
1719
"github.com/grafana/loki/v3/pkg/util/constants"
1820
)
@@ -31,46 +33,43 @@ var (
3133
errorAbbrv = []byte("err")
3234
critical = []byte("critical")
3335
fatal = []byte("fatal")
36+
37+
defaultAllowedLevelFields = []string{"level", "LEVEL", "Level", "severity", "SEVERITY", "Severity", "lvl", "LVL", "Lvl"}
3438
)
3539

36-
func allowedLabelsForLevel(allowedFields []string) map[string]struct{} {
40+
func allowedLabelsForLevel(allowedFields []string) []string {
3741
if len(allowedFields) == 0 {
38-
return map[string]struct{}{
39-
"level": {}, "LEVEL": {}, "Level": {},
40-
"severity": {}, "SEVERITY": {}, "Severity": {},
41-
"lvl": {}, "LVL": {}, "Lvl": {},
42-
}
43-
}
44-
allowedFieldsMap := make(map[string]struct{}, len(allowedFields))
45-
for _, field := range allowedFields {
46-
allowedFieldsMap[field] = struct{}{}
42+
return defaultAllowedLevelFields
4743
}
48-
return allowedFieldsMap
44+
return allowedFields
4945
}
5046

51-
type LevelDetector struct {
52-
validationContext validationContext
53-
allowedLabels map[string]struct{}
47+
type FieldDetector struct {
48+
validationContext validationContext
49+
allowedLevelLabels []string
5450
}
5551

56-
func newLevelDetector(validationContext validationContext) *LevelDetector {
57-
logLevelFields := validationContext.logLevelFields
58-
return &LevelDetector{
59-
validationContext: validationContext,
60-
allowedLabels: allowedLabelsForLevel(logLevelFields),
52+
func newFieldDetector(validationContext validationContext) *FieldDetector {
53+
return &FieldDetector{
54+
validationContext: validationContext,
55+
allowedLevelLabels: allowedLabelsForLevel(validationContext.logLevelFields),
6156
}
6257
}
6358

64-
func (l *LevelDetector) shouldDiscoverLogLevels() bool {
59+
func (l *FieldDetector) shouldDiscoverLogLevels() bool {
6560
return l.validationContext.allowStructuredMetadata && l.validationContext.discoverLogLevels
6661
}
6762

68-
func (l *LevelDetector) extractLogLevel(labels labels.Labels, structuredMetadata labels.Labels, entry logproto.Entry) (logproto.LabelAdapter, bool) {
69-
levelFromLabel, hasLevelLabel := l.hasAnyLevelLabels(labels)
63+
func (l *FieldDetector) shouldDiscoverGenericFields() bool {
64+
return l.validationContext.allowStructuredMetadata && len(l.validationContext.discoverGenericFields) > 0
65+
}
66+
67+
func (l *FieldDetector) extractLogLevel(labels labels.Labels, structuredMetadata labels.Labels, entry logproto.Entry) (logproto.LabelAdapter, bool) {
68+
levelFromLabel, hasLevelLabel := labelsContainAny(labels, l.allowedLevelLabels)
7069
var logLevel string
7170
if hasLevelLabel {
7271
logLevel = levelFromLabel
73-
} else if levelFromMetadata, ok := l.hasAnyLevelLabels(structuredMetadata); ok {
72+
} else if levelFromMetadata, ok := labelsContainAny(structuredMetadata, l.allowedLevelLabels); ok {
7473
logLevel = levelFromMetadata
7574
} else {
7675
logLevel = l.detectLogLevelFromLogEntry(entry, structuredMetadata)
@@ -85,16 +84,33 @@ func (l *LevelDetector) extractLogLevel(labels labels.Labels, structuredMetadata
8584
}, true
8685
}
8786

88-
func (l *LevelDetector) hasAnyLevelLabels(labels labels.Labels) (string, bool) {
89-
for lbl := range l.allowedLabels {
90-
if labels.Has(lbl) {
91-
return labels.Get(lbl), true
87+
func (l *FieldDetector) extractGenericField(name string, hints []string, labels labels.Labels, structuredMetadata labels.Labels, entry logproto.Entry) (logproto.LabelAdapter, bool) {
88+
89+
var value string
90+
if v, ok := labelsContainAny(labels, hints); ok {
91+
value = v
92+
} else if v, ok := labelsContainAny(structuredMetadata, hints); ok {
93+
value = v
94+
} else {
95+
value = l.detectGenericFieldFromLogEntry(entry, hints)
96+
}
97+
98+
if value == "" {
99+
return logproto.LabelAdapter{}, false
100+
}
101+
return logproto.LabelAdapter{Name: name, Value: value}, true
102+
}
103+
104+
func labelsContainAny(labels labels.Labels, names []string) (string, bool) {
105+
for _, name := range names {
106+
if labels.Has(name) {
107+
return labels.Get(name), true
92108
}
93109
}
94110
return "", false
95111
}
96112

97-
func (l *LevelDetector) detectLogLevelFromLogEntry(entry logproto.Entry, structuredMetadata labels.Labels) string {
113+
func (l *FieldDetector) detectLogLevelFromLogEntry(entry logproto.Entry, structuredMetadata labels.Labels) string {
98114
// otlp logs have a severity number, using which we are defining the log levels.
99115
// Significance of severity number is explained in otel docs here https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber
100116
if otlpSeverityNumberTxt := structuredMetadata.Get(push.OTLPSeverityNumber); otlpSeverityNumberTxt != "" {
@@ -123,13 +139,24 @@ func (l *LevelDetector) detectLogLevelFromLogEntry(entry logproto.Entry, structu
123139
return l.extractLogLevelFromLogLine(entry.Line)
124140
}
125141

126-
func (l *LevelDetector) extractLogLevelFromLogLine(log string) string {
127-
logSlice := unsafe.Slice(unsafe.StringData(log), len(log))
142+
func (l *FieldDetector) detectGenericFieldFromLogEntry(entry logproto.Entry, hints []string) string {
143+
lineBytes := unsafe.Slice(unsafe.StringData(entry.Line), len(entry.Line))
144+
var v []byte
145+
if isJSON(entry.Line) {
146+
v = getValueUsingJSONParser(lineBytes, hints)
147+
} else if isLogFmt(lineBytes) {
148+
v = getValueUsingLogfmtParser(lineBytes, hints)
149+
}
150+
return string(v)
151+
}
152+
153+
func (l *FieldDetector) extractLogLevelFromLogLine(log string) string {
154+
lineBytes := unsafe.Slice(unsafe.StringData(log), len(log))
128155
var v []byte
129156
if isJSON(log) {
130-
v = l.getValueUsingJSONParser(logSlice)
131-
} else if isLogFmt(logSlice) {
132-
v = l.getValueUsingLogfmtParser(logSlice)
157+
v = getValueUsingJSONParser(lineBytes, l.allowedLevelLabels)
158+
} else if isLogFmt(lineBytes) {
159+
v = getValueUsingLogfmtParser(lineBytes, l.allowedLevelLabels)
133160
} else {
134161
return detectLevelFromLogLine(log)
135162
}
@@ -154,24 +181,42 @@ func (l *LevelDetector) extractLogLevelFromLogLine(log string) string {
154181
}
155182
}
156183

157-
func (l *LevelDetector) getValueUsingLogfmtParser(line []byte) []byte {
184+
func getValueUsingLogfmtParser(line []byte, hints []string) []byte {
158185
d := logfmt.NewDecoder(line)
186+
// In order to have the same behaviour as the JSON field extraction,
187+
// the full line needs to be parsed to extract all possible matching fields.
188+
pos := len(hints) // the index of the hint that matches
189+
var res []byte
159190
for !d.EOL() && d.ScanKeyval() {
160-
if _, ok := l.allowedLabels[string(d.Key())]; ok {
161-
return d.Value()
191+
k := unsafe.String(unsafe.SliceData(d.Key()), len(d.Key()))
192+
for x, hint := range hints {
193+
if strings.EqualFold(k, hint) && x < pos {
194+
res, pos = d.Value(), x
195+
// If there is only a single hint, or the matching hint is the first one,
196+
// we can stop parsing the rest of the line and return early.
197+
if x == 0 {
198+
return res
199+
}
200+
}
162201
}
163202
}
164-
return nil
203+
return res
165204
}
166205

167-
func (l *LevelDetector) getValueUsingJSONParser(log []byte) []byte {
168-
for allowedLabel := range l.allowedLabels {
169-
l, _, _, err := jsonparser.Get(log, allowedLabel)
170-
if err == nil {
171-
return l
206+
func getValueUsingJSONParser(line []byte, hints []string) []byte {
207+
var res []byte
208+
for _, allowedLabel := range hints {
209+
parsed, err := jsonexpr.Parse(allowedLabel, false)
210+
if err != nil {
211+
continue
212+
}
213+
l, _, _, err := jsonparser.Get(line, log.JSONPathToStrings(parsed)...)
214+
if err != nil {
215+
continue
172216
}
217+
return l
173218
}
174-
return nil
219+
return res
175220
}
176221

177222
func isLogFmt(line []byte) bool {

0 commit comments

Comments
 (0)