Skip to content

Commit 5c8e832

Browse files
authored
feat: Introduce policy stream mapping (#15982)
**What this PR does / why we need it**: Introduces the idea of policies to Loki, which are recognizable based on the given stream selectors. This is an improved version of #15561 and built on top of #15875. A policy mapping can be configured the following way: ```yaml 12345: policy_stream_mapping: policy6: - selector: `{env="prod"}` priority: 2 - selector: `{env=~"prod|staging"}` priority: 1 - selector: `{team="finance"}` priority: 4 policy7: - selector: `{env=~"prod|dev"}` priority: 3 ``` With that configuration, pushes to tenant `12345` with the labels `{env="prod", team="finance"}` would be assigned to policy6 because the third mapping for policy6 matches these labels and has higher priority than any other matching.
1 parent 2587f34 commit 5c8e832

File tree

15 files changed

+359
-84
lines changed

15 files changed

+359
-84
lines changed

‎docs/sources/shared/configuration.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3613,6 +3613,20 @@ otlp_config:
36133613
# CLI flag: -validation.enforced-labels
36143614
[enforced_labels: <list of strings> | default = []]
36153615

3616+
# Map of policies to stream selectors with a priority. Experimental.
3617+
# Example:
3618+
# policy_stream_mapping:
3619+
# finance:
3620+
# - selectors: ["{namespace="prod", container="billing"}"]
3621+
# priority: 2
3622+
# ops:
3623+
# - selectors: ["{namespace="prod", container="ops"}"]
3624+
# priority: 1
3625+
# staging:
3626+
# - selectors: ["{namespace="staging"}, {namespace="dev"}"]
3627+
# priority: 1
3628+
[policy_stream_mapping: <map of string to list of PriorityStreams>]
3629+
36163630
# The number of partitions a tenant's data should be sharded to when using kafka
36173631
# ingestion. Tenants are sharded across partitions using shuffle-sharding. 0
36183632
# disables shuffle sharding and tenant is sharded across all partitions.

‎pkg/compactor/retention/expiration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type Limits interface {
4242
StreamRetention(userID string) []validation.StreamRetention
4343
AllByUserID() map[string]*validation.Limits
4444
DefaultLimits() *validation.Limits
45+
PoliciesStreamMapping(userID string) validation.PolicyStreamMapping
4546
}
4647

4748
func NewExpirationChecker(limits Limits) ExpirationChecker {

‎pkg/compactor/retention/expiration_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ import (
1313
)
1414

1515
type retentionLimit struct {
16-
retentionPeriod time.Duration
17-
streamRetention []validation.StreamRetention
16+
retentionPeriod time.Duration
17+
streamRetention []validation.StreamRetention
18+
policyStreamMapping validation.PolicyStreamMapping
1819
}
1920

2021
func (r retentionLimit) convertToValidationLimit() *validation.Limits {
@@ -33,6 +34,10 @@ func (f fakeLimits) RetentionPeriod(userID string) time.Duration {
3334
return f.perTenant[userID].retentionPeriod
3435
}
3536

37+
func (f fakeLimits) PoliciesStreamMapping(_ string) validation.PolicyStreamMapping {
38+
return f.perTenant["user0"].policyStreamMapping
39+
}
40+
3641
func (f fakeLimits) StreamRetention(userID string) []validation.StreamRetention {
3742
return f.perTenant[userID].streamRetention
3843
}

‎pkg/distributor/distributor.go

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -528,24 +528,24 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
528528
d.truncateLines(validationContext, &stream)
529529

530530
var lbs labels.Labels
531-
var retentionHours string
532-
lbs, stream.Labels, stream.Hash, retentionHours, err = d.parseStreamLabels(validationContext, stream.Labels, stream)
531+
var retentionHours, policy string
532+
lbs, stream.Labels, stream.Hash, retentionHours, policy, err = d.parseStreamLabels(validationContext, stream.Labels, stream)
533533
if err != nil {
534534
d.writeFailuresManager.Log(tenantID, err)
535535
validationErrors.Add(err)
536-
validation.DiscardedSamples.WithLabelValues(validation.InvalidLabels, tenantID, retentionHours).Add(float64(len(stream.Entries)))
536+
validation.DiscardedSamples.WithLabelValues(validation.InvalidLabels, tenantID, retentionHours, policy).Add(float64(len(stream.Entries)))
537537
discardedBytes := util.EntriesTotalSize(stream.Entries)
538-
validation.DiscardedBytes.WithLabelValues(validation.InvalidLabels, tenantID, retentionHours).Add(float64(discardedBytes))
538+
validation.DiscardedBytes.WithLabelValues(validation.InvalidLabels, tenantID, retentionHours, policy).Add(float64(discardedBytes))
539539
continue
540540
}
541541

542542
if missing, lbsMissing := d.missingEnforcedLabels(lbs, tenantID); missing {
543543
err := fmt.Errorf(validation.MissingEnforcedLabelsErrorMsg, strings.Join(lbsMissing, ","), tenantID)
544544
d.writeFailuresManager.Log(tenantID, err)
545545
validationErrors.Add(err)
546-
validation.DiscardedSamples.WithLabelValues(validation.MissingEnforcedLabels, tenantID, retentionHours).Add(float64(len(stream.Entries)))
546+
validation.DiscardedSamples.WithLabelValues(validation.MissingEnforcedLabels, tenantID, retentionHours, policy).Add(float64(len(stream.Entries)))
547547
discardedBytes := util.EntriesTotalSize(stream.Entries)
548-
validation.DiscardedBytes.WithLabelValues(validation.MissingEnforcedLabels, tenantID, retentionHours).Add(float64(discardedBytes))
548+
validation.DiscardedBytes.WithLabelValues(validation.MissingEnforcedLabels, tenantID, retentionHours, policy).Add(float64(discardedBytes))
549549
continue
550550
}
551551

@@ -554,7 +554,7 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
554554
prevTs := stream.Entries[0].Timestamp
555555

556556
for _, entry := range stream.Entries {
557-
if err := d.validator.ValidateEntry(ctx, validationContext, lbs, entry, retentionHours); err != nil {
557+
if err := d.validator.ValidateEntry(ctx, validationContext, lbs, entry, retentionHours, policy); err != nil {
558558
d.writeFailuresManager.Log(tenantID, err)
559559
validationErrors.Add(err)
560560
continue
@@ -609,7 +609,7 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
609609
}
610610

611611
n++
612-
validationContext.validationMetrics.compute(entry, retentionHours)
612+
validationContext.validationMetrics.compute(entry, retentionHours, policy)
613613
pushSize += len(entry.Line)
614614
}
615615
stream.Entries = stream.Entries[:n]
@@ -647,10 +647,10 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log
647647
return nil, httpgrpc.Errorf(retStatusCode, "%s", err.Error())
648648
}
649649

650-
if !d.ingestionRateLimiter.AllowN(now, tenantID, validationContext.validationMetrics.lineSize) {
650+
if !d.ingestionRateLimiter.AllowN(now, tenantID, validationContext.validationMetrics.aggregatedPushStats.lineSize) {
651651
d.trackDiscardedData(ctx, req, validationContext, tenantID, validationContext.validationMetrics, validation.RateLimited)
652652

653-
err = fmt.Errorf(validation.RateLimitedErrorMsg, tenantID, int(d.ingestionRateLimiter.Limit(now, tenantID)), validationContext.validationMetrics.lineCount, validationContext.validationMetrics.lineSize)
653+
err = fmt.Errorf(validation.RateLimitedErrorMsg, tenantID, int(d.ingestionRateLimiter.Limit(now, tenantID)), validationContext.validationMetrics.aggregatedPushStats.lineCount, validationContext.validationMetrics.aggregatedPushStats.lineSize)
654654
d.writeFailuresManager.Log(tenantID, err)
655655
// Return a 429 to indicate to the client they are being rate limited
656656
return nil, httpgrpc.Errorf(http.StatusTooManyRequests, "%s", err.Error())
@@ -787,14 +787,16 @@ func (d *Distributor) trackDiscardedData(
787787
validationMetrics validationMetrics,
788788
reason string,
789789
) {
790-
for retentionHours, count := range validationMetrics.lineCountPerRetentionHours {
791-
validation.DiscardedSamples.WithLabelValues(reason, tenantID, retentionHours).Add(float64(count))
792-
validation.DiscardedBytes.WithLabelValues(reason, tenantID, retentionHours).Add(float64(validationMetrics.lineSizePerRetentionHours[retentionHours]))
790+
for policy, retentionToStats := range validationMetrics.policyPushStats {
791+
for retentionHours, stats := range retentionToStats {
792+
validation.DiscardedSamples.WithLabelValues(reason, tenantID, retentionHours, policy).Add(float64(stats.lineCount))
793+
validation.DiscardedBytes.WithLabelValues(reason, tenantID, retentionHours, policy).Add(float64(stats.lineSize))
794+
}
793795
}
794796

795797
if d.usageTracker != nil {
796798
for _, stream := range req.Streams {
797-
lbs, _, _, _, err := d.parseStreamLabels(validationContext, stream.Labels, stream)
799+
lbs, _, _, _, _, err := d.parseStreamLabels(validationContext, stream.Labels, stream)
798800
if err != nil {
799801
continue
800802
}
@@ -1173,28 +1175,32 @@ type labelData struct {
11731175
hash uint64
11741176
}
11751177

1176-
func (d *Distributor) parseStreamLabels(vContext validationContext, key string, stream logproto.Stream) (labels.Labels, string, uint64, string, error) {
1178+
func (d *Distributor) parseStreamLabels(vContext validationContext, key string, stream logproto.Stream) (labels.Labels, string, uint64, string, string, error) {
1179+
mapping := d.validator.Limits.PoliciesStreamMapping(vContext.userID)
11771180
if val, ok := d.labelCache.Get(key); ok {
11781181
retentionHours := d.tenantsRetention.RetentionHoursFor(vContext.userID, val.ls)
1179-
return val.ls, val.ls.String(), val.hash, retentionHours, nil
1182+
policy := mapping.PolicyFor(val.ls)
1183+
return val.ls, val.ls.String(), val.hash, retentionHours, policy, nil
11801184
}
11811185

11821186
ls, err := syntax.ParseLabels(key)
11831187
if err != nil {
1184-
tenantRetentionHours := d.tenantsRetention.RetentionHoursFor(vContext.userID, nil)
1185-
return nil, "", 0, tenantRetentionHours, fmt.Errorf(validation.InvalidLabelsErrorMsg, key, err)
1188+
retentionHours := d.tenantsRetention.RetentionHoursFor(vContext.userID, nil)
1189+
// TODO: check for global policy.
1190+
return nil, "", 0, retentionHours, mapping.PolicyFor(nil), fmt.Errorf(validation.InvalidLabelsErrorMsg, key, err)
11861191
}
11871192

1193+
policy := mapping.PolicyFor(ls)
11881194
retentionHours := d.tenantsRetention.RetentionHoursFor(vContext.userID, ls)
11891195

1190-
if err := d.validator.ValidateLabels(vContext, ls, stream, retentionHours); err != nil {
1191-
return nil, "", 0, retentionHours, err
1196+
if err := d.validator.ValidateLabels(vContext, ls, stream, retentionHours, policy); err != nil {
1197+
return nil, "", 0, retentionHours, policy, err
11921198
}
11931199

11941200
lsHash := ls.Hash()
11951201

11961202
d.labelCache.Add(key, labelData{ls, lsHash})
1197-
return ls, ls.String(), lsHash, retentionHours, nil
1203+
return ls, ls.String(), lsHash, retentionHours, policy, nil
11981204
}
11991205

12001206
// shardCountFor returns the right number of shards to be used by the given stream.

‎pkg/distributor/distributor_test.go

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ func Benchmark_SortLabelsOnPush(b *testing.B) {
12331233
for n := 0; n < b.N; n++ {
12341234
stream := request.Streams[0]
12351235
stream.Labels = `{buzz="f", a="b"}`
1236-
_, _, _, _, err := d.parseStreamLabels(vCtx, stream.Labels, stream)
1236+
_, _, _, _, _, err := d.parseStreamLabels(vCtx, stream.Labels, stream)
12371237
if err != nil {
12381238
panic("parseStreamLabels fail,err:" + err.Error())
12391239
}
@@ -1279,7 +1279,7 @@ func TestParseStreamLabels(t *testing.T) {
12791279
vCtx := d.validator.getValidationContextForTime(testTime, "123")
12801280

12811281
t.Run(tc.name, func(t *testing.T) {
1282-
lbs, lbsString, hash, _, err := d.parseStreamLabels(vCtx, tc.origLabels, logproto.Stream{
1282+
lbs, lbsString, hash, _, _, err := d.parseStreamLabels(vCtx, tc.origLabels, logproto.Stream{
12831283
Labels: tc.origLabels,
12841284
})
12851285
if tc.expectedErr != nil {
@@ -2063,3 +2063,62 @@ func TestDistributor_StructuredMetadataSanitization(t *testing.T) {
20632063
assert.Equal(t, tc.numSanitizations, testutil.ToFloat64(distributors[0].tenantPushSanitizedStructuredMetadata.WithLabelValues("test")))
20642064
}
20652065
}
2066+
2067+
func BenchmarkDistributor_PushWithPolicies(b *testing.B) {
2068+
baselineLimits := &validation.Limits{}
2069+
flagext.DefaultValues(baselineLimits)
2070+
lbs := `{foo="bar", env="prod", daz="baz", container="loki", pod="loki-0"}`
2071+
2072+
b.Run("push without policies", func(b *testing.B) {
2073+
limits := baselineLimits
2074+
limits.PolicyStreamMapping = make(validation.PolicyStreamMapping)
2075+
distributors, _ := prepare(&testing.T{}, 1, 3, limits, nil)
2076+
req := makeWriteRequestWithLabels(10, 10, []string{lbs}, false, false, false)
2077+
b.ResetTimer()
2078+
for i := 0; i < b.N; i++ {
2079+
distributors[0].Push(ctx, req) //nolint:errcheck
2080+
}
2081+
})
2082+
2083+
for numPolicies := 1; numPolicies <= 100; numPolicies *= 10 {
2084+
b.Run(fmt.Sprintf("push with %d policies", numPolicies), func(b *testing.B) {
2085+
limits := baselineLimits
2086+
limits.PolicyStreamMapping = make(validation.PolicyStreamMapping)
2087+
for i := 1; i <= numPolicies; i++ {
2088+
limits.PolicyStreamMapping[fmt.Sprintf("policy%d", i)] = []*validation.PriorityStream{
2089+
{
2090+
Selector: `{foo="bar"}`, Priority: i,
2091+
},
2092+
}
2093+
}
2094+
2095+
req := makeWriteRequestWithLabels(10, 10, []string{lbs}, false, false, false)
2096+
distributors, _ := prepare(&testing.T{}, 1, 3, limits, nil)
2097+
b.ResetTimer()
2098+
for i := 0; i < b.N; i++ {
2099+
distributors[0].Push(ctx, req) //nolint:errcheck
2100+
}
2101+
})
2102+
}
2103+
2104+
for numMatchers := 1; numMatchers <= 100; numMatchers *= 10 {
2105+
b.Run(fmt.Sprintf("push with %d matchers", numMatchers), func(b *testing.B) {
2106+
limits := baselineLimits
2107+
limits.PolicyStreamMapping = make(validation.PolicyStreamMapping)
2108+
for i := 1; i <= numMatchers; i++ {
2109+
limits.PolicyStreamMapping["policy0"] = append(limits.PolicyStreamMapping["policy0"], &validation.PriorityStream{
2110+
Selector: `{foo="bar"}`,
2111+
Matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")},
2112+
Priority: i,
2113+
})
2114+
}
2115+
2116+
req := makeWriteRequestWithLabels(10, 10, []string{lbs}, false, false, false)
2117+
distributors, _ := prepare(&testing.T{}, 1, 3, limits, nil)
2118+
b.ResetTimer()
2119+
for i := 0; i < b.N; i++ {
2120+
distributors[0].Push(ctx, req) //nolint:errcheck
2121+
}
2122+
})
2123+
}
2124+
}

‎pkg/distributor/validation_metrics.go

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,40 @@ import (
55
"github.com/grafana/loki/v3/pkg/util"
66
)
77

8+
type pushStats struct {
9+
lineSize int
10+
lineCount int
11+
}
12+
813
type validationMetrics struct {
9-
lineSizePerRetentionHours map[string]int
10-
lineCountPerRetentionHours map[string]int
11-
lineSize int
12-
lineCount int
13-
tenantRetentionHours string
14+
policyPushStats map[string]map[string]pushStats // policy -> retentionHours -> lineSize
15+
tenantRetentionHours string
16+
aggregatedPushStats pushStats
1417
}
1518

1619
func newValidationMetrics(tenantRetentionHours string) validationMetrics {
1720
return validationMetrics{
18-
lineSizePerRetentionHours: make(map[string]int),
19-
lineCountPerRetentionHours: make(map[string]int),
20-
tenantRetentionHours: tenantRetentionHours,
21+
policyPushStats: make(map[string]map[string]pushStats),
22+
tenantRetentionHours: tenantRetentionHours,
2123
}
2224
}
2325

24-
func (v *validationMetrics) compute(entry logproto.Entry, retentionHours string) {
26+
func (v *validationMetrics) compute(entry logproto.Entry, retentionHours string, policy string) {
27+
if _, ok := v.policyPushStats[policy]; !ok {
28+
v.policyPushStats[policy] = make(map[string]pushStats)
29+
}
30+
31+
if _, ok := v.policyPushStats[policy][retentionHours]; !ok {
32+
v.policyPushStats[policy][retentionHours] = pushStats{}
33+
}
34+
2535
totalEntrySize := util.EntryTotalSize(&entry)
26-
v.lineSizePerRetentionHours[retentionHours] += totalEntrySize
27-
v.lineCountPerRetentionHours[retentionHours]++
28-
v.lineSize += totalEntrySize
29-
v.lineCount++
36+
37+
v.aggregatedPushStats.lineSize += totalEntrySize
38+
v.aggregatedPushStats.lineCount++
39+
40+
stats := v.policyPushStats[policy][retentionHours]
41+
stats.lineCount++
42+
stats.lineSize += totalEntrySize
43+
v.policyPushStats[policy][retentionHours] = stats
3044
}

0 commit comments

Comments
 (0)