Skip to content

fix: skip streams over limits in dry-run mode #17114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 36 additions & 22 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,7 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
return &logproto.PushResponse{}, validationErr
}

var skipMetadataHashes map[uint64]struct{}
if d.cfg.IngestLimitsEnabled {
streamsAfterLimits, reasonsForHashes, err := d.ingestLimits.enforceLimits(ctx, tenantID, streams)
if err != nil {
Expand All @@ -738,6 +739,18 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
streams = streamsAfterLimits
}
}

if len(reasonsForHashes) > 0 && d.cfg.IngestLimitsDryRunEnabled {
// When IngestLimitsDryRunEnabled is true, we need to stop stream hashes
// that exceed the stream limit from being written to the metadata topic.
// If we don't do this, the stream hashes that should have been rejected
// will instead being counted as a known stream, causing a disagreement
// in metrics between the limits service and ingesters.
skipMetadataHashes = make(map[uint64]struct{})
for streamHash := range reasonsForHashes {
skipMetadataHashes[streamHash] = struct{}{}
}
}
}

if !d.ingestionRateLimiter.AllowN(now, tenantID, validationContext.validationMetrics.aggregatedPushStats.lineSize) {
Expand Down Expand Up @@ -778,7 +791,7 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
return nil, err
}
// We don't need to create a new context like the ingester writes, because we don't return unless all writes have succeeded.
d.sendStreamsToKafka(ctx, streams, tenantID, &tracker, subring)
d.sendStreamsToKafka(ctx, streams, skipMetadataHashes, tenantID, &tracker, subring)
}

if d.cfg.IngesterEnabled {
Expand Down Expand Up @@ -1213,10 +1226,10 @@ func (d *Distributor) sendStreamsErr(ctx context.Context, ingester ring.Instance
return err
}

func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStream, tenant string, tracker *pushTracker, subring *ring.PartitionRing) {
func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStream, skipMetadataHashes map[uint64]struct{}, tenant string, tracker *pushTracker, subring *ring.PartitionRing) {
for _, s := range streams {
go func(s KeyedStream) {
err := d.sendStreamToKafka(ctx, s, tenant, subring)
err := d.sendStreamToKafka(ctx, s, skipMetadataHashes, tenant, subring)
if err != nil {
err = fmt.Errorf("failed to write stream to kafka: %w", err)
}
Expand All @@ -1225,7 +1238,7 @@ func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStr
}
}

func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream, tenant string, subring *ring.PartitionRing) error {
func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream, skipMetadataHashes map[uint64]struct{}, tenant string, subring *ring.PartitionRing) error {
if len(stream.Stream.Entries) == 0 {
return nil
}
Expand Down Expand Up @@ -1255,26 +1268,27 @@ func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream,

entriesSize, structuredMetadataSize := calculateStreamSizes(stream.Stream)

// However, unlike stream records, the distributor writes stream metadata
// records to one of a fixed number of partitions, the size of which is
// determined ahead of time. It does not use a ring. The reason for this
// is that we want to be able to scale components that consume metadata
// records independent of ingesters.
metadataPartitionID := int32(stream.HashKeyNoShard % uint64(d.numMetadataPartitions))
metadata, err := kafka.EncodeStreamMetadata(
metadataPartitionID,
d.cfg.KafkaConfig.Topic,
tenant,
stream.HashKeyNoShard,
entriesSize,
structuredMetadataSize,
)
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
if _, ok := skipMetadataHashes[stream.HashKeyNoShard]; !ok {
// However, unlike stream records, the distributor writes stream metadata
// records to one of a fixed number of partitions, the size of which is
// determined ahead of time. It does not use a ring. The reason for this
// is that we want to be able to scale components that consume metadata
// records independent of ingesters.
metadataPartitionID := int32(stream.HashKeyNoShard % uint64(d.numMetadataPartitions))
metadata, err := kafka.EncodeStreamMetadata(
metadataPartitionID,
d.cfg.KafkaConfig.Topic,
tenant,
stream.HashKeyNoShard,
entriesSize,
structuredMetadataSize,
)
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
records = append(records, metadata)
}

records = append(records, metadata)

d.kafkaRecordsPerRequest.Observe(float64(len(records)))

produceResults := d.kafkaWriter.ProduceSync(ctx, records)
Expand Down
Loading