Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
handle bad ingesters in partition-ctx
  • Loading branch information
benclive committed Nov 29, 2024
commit 9027d4d98b62d03f72be595a3b86a4cbc6225692
36 changes: 25 additions & 11 deletions pkg/querier/ingester_querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ const (

type PartitionContext struct {
isPartitioned bool
ingestersUsed []PartitionIngesterUsed
ingestersUsed map[string]PartitionIngesterUsed
mtx sync.Mutex
}

Expand All @@ -106,7 +106,16 @@ func (p *PartitionContext) AddClient(client logproto.QuerierClient, addr string)
if !p.isPartitioned {
return
}
p.ingestersUsed = append(p.ingestersUsed, PartitionIngesterUsed{client: client, addr: addr})
p.ingestersUsed[addr] = PartitionIngesterUsed{client: client, addr: addr}
}

func (p *PartitionContext) RemoveClient(addr string) {
p.mtx.Lock()
defer p.mtx.Unlock()
if !p.isPartitioned {
return
}
delete(p.ingestersUsed, addr)
}

func (p *PartitionContext) SetIsPartitioned(isPartitioned bool) {
Expand All @@ -123,11 +132,12 @@ func (p *PartitionContext) forQueriedIngesters(ctx context.Context, f func(conte
p.mtx.Lock()
defer p.mtx.Unlock()

var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, 5*time.Second)
defer cancel()
ingestersUsed := make([]PartitionIngesterUsed, 0, len(p.ingestersUsed))
for _, ingester := range p.ingestersUsed {
ingestersUsed = append(ingestersUsed, ingester)
}

return concurrency.ForEachJobMergeResults(ctx, p.ingestersUsed, 0, func(ctx context.Context, job PartitionIngesterUsed) ([]responseFromIngesters, error) {
return concurrency.ForEachJobMergeResults(ctx, ingestersUsed, 0, func(ctx context.Context, job PartitionIngesterUsed) ([]responseFromIngesters, error) {
resp, err := f(ctx, job.client)
if err != nil {
return nil, err
Expand All @@ -139,13 +149,17 @@ func (p *PartitionContext) forQueriedIngesters(ctx context.Context, f func(conte
// NewPartitionContext creates a new partition context
// This is used to track which ingesters were used in the query and reuse the same ingesters for consecutive queries
func NewPartitionContext(ctx context.Context) context.Context {
return context.WithValue(ctx, partitionCtxKey, &PartitionContext{})
return context.WithValue(ctx, partitionCtxKey, &PartitionContext{
ingestersUsed: make(map[string]PartitionIngesterUsed),
})
}

func ExtractPartitionContext(ctx context.Context) *PartitionContext {
v, ok := ctx.Value(partitionCtxKey).(*PartitionContext)
if !ok {
return &PartitionContext{}
return &PartitionContext{
ingestersUsed: make(map[string]PartitionIngesterUsed),
}
}
return v
}
Expand Down Expand Up @@ -197,15 +211,15 @@ func (q *IngesterQuerier) forGivenIngesters(ctx context.Context, replicationSet
if err != nil {
return responseFromIngesters{addr: ingester.Addr}, err
}
ExtractPartitionContext(ctx).AddClient(client.(logproto.QuerierClient), ingester.Addr)
resp, err := f(ctx, client.(logproto.QuerierClient))
if err != nil {
return responseFromIngesters{addr: ingester.Addr}, err
}

ExtractPartitionContext(ctx).AddClient(client.(logproto.QuerierClient), ingester.Addr)
return responseFromIngesters{ingester.Addr, resp}, nil
}, func(responseFromIngesters) {
// Nothing to do
}, func(cleanup responseFromIngesters) {
ExtractPartitionContext(ctx).RemoveClient(cleanup.addr)
})
if err != nil {
return nil, err
Expand Down
Loading