5
5
"net/http"
6
6
"slices"
7
7
"strings"
8
+ "sync"
8
9
"time"
9
10
10
11
"github.com/go-kit/log"
@@ -82,10 +83,91 @@ func newIngesterQuerier(querierConfig Config, clientCfg client.Config, ring ring
82
83
return & iq , nil
83
84
}
84
85
86
+ type ctxKeyType string
87
+
88
+ const (
89
+ partitionCtxKey ctxKeyType = "partitionCtx"
90
+ )
91
+
92
+ type PartitionContext struct {
93
+ isPartitioned bool
94
+ ingestersUsed map [string ]PartitionIngesterUsed
95
+ mtx sync.Mutex
96
+ }
97
+
98
+ type PartitionIngesterUsed struct {
99
+ client logproto.QuerierClient
100
+ addr string
101
+ }
102
+
103
+ func (p * PartitionContext ) AddClient (client logproto.QuerierClient , addr string ) {
104
+ p .mtx .Lock ()
105
+ defer p .mtx .Unlock ()
106
+ if ! p .isPartitioned {
107
+ return
108
+ }
109
+ p .ingestersUsed [addr ] = PartitionIngesterUsed {client : client , addr : addr }
110
+ }
111
+
112
+ func (p * PartitionContext ) RemoveClient (addr string ) {
113
+ p .mtx .Lock ()
114
+ defer p .mtx .Unlock ()
115
+ if ! p .isPartitioned {
116
+ return
117
+ }
118
+ delete (p .ingestersUsed , addr )
119
+ }
120
+
121
+ func (p * PartitionContext ) SetIsPartitioned (isPartitioned bool ) {
122
+ p .mtx .Lock ()
123
+ defer p .mtx .Unlock ()
124
+ p .isPartitioned = isPartitioned
125
+ }
126
+
127
+ func (p * PartitionContext ) IsPartitioned () bool {
128
+ return p .isPartitioned
129
+ }
130
+
131
+ func (p * PartitionContext ) forQueriedIngesters (ctx context.Context , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
132
+ p .mtx .Lock ()
133
+ defer p .mtx .Unlock ()
134
+
135
+ ingestersUsed := make ([]PartitionIngesterUsed , 0 , len (p .ingestersUsed ))
136
+ for _ , ingester := range p .ingestersUsed {
137
+ ingestersUsed = append (ingestersUsed , ingester )
138
+ }
139
+
140
+ return concurrency .ForEachJobMergeResults (ctx , ingestersUsed , 0 , func (ctx context.Context , job PartitionIngesterUsed ) ([]responseFromIngesters , error ) {
141
+ resp , err := f (ctx , job .client )
142
+ if err != nil {
143
+ return nil , err
144
+ }
145
+ return []responseFromIngesters {{addr : job .addr , response : resp }}, nil
146
+ })
147
+ }
148
+
149
+ // NewPartitionContext creates a new partition context
150
+ // This is used to track which ingesters were used in the query and reuse the same ingesters for consecutive queries
151
+ func NewPartitionContext (ctx context.Context ) context.Context {
152
+ return context .WithValue (ctx , partitionCtxKey , & PartitionContext {
153
+ ingestersUsed : make (map [string ]PartitionIngesterUsed ),
154
+ })
155
+ }
156
+
157
+ func ExtractPartitionContext (ctx context.Context ) * PartitionContext {
158
+ v , ok := ctx .Value (partitionCtxKey ).(* PartitionContext )
159
+ if ! ok {
160
+ return & PartitionContext {
161
+ ingestersUsed : make (map [string ]PartitionIngesterUsed ),
162
+ }
163
+ }
164
+ return v
165
+ }
166
+
85
167
// forAllIngesters runs f, in parallel, for all ingesters
86
- // waitForAllResponses param can be used to require results from all ingesters in the replication set. If this is set to false, the call will return as soon as we have a quorum by zone. Only valid for partition-ingesters.
87
- func (q * IngesterQuerier ) forAllIngesters (ctx context.Context , waitForAllResponses bool , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
168
+ func (q * IngesterQuerier ) forAllIngesters (ctx context.Context , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
88
169
if q .querierConfig .QueryPartitionIngesters {
170
+ ExtractPartitionContext (ctx ).SetIsPartitioned (true )
89
171
tenantID , err := user .ExtractOrgID (ctx )
90
172
if err != nil {
91
173
return nil , err
@@ -99,7 +181,7 @@ func (q *IngesterQuerier) forAllIngesters(ctx context.Context, waitForAllRespons
99
181
if err != nil {
100
182
return nil , err
101
183
}
102
- return q .forGivenIngesterSets (ctx , waitForAllResponses , replicationSets , f )
184
+ return q .forGivenIngesterSets (ctx , replicationSets , f )
103
185
}
104
186
105
187
replicationSet , err := q .ring .GetReplicationSetForOperation (ring .Read )
@@ -111,19 +193,13 @@ func (q *IngesterQuerier) forAllIngesters(ctx context.Context, waitForAllRespons
111
193
}
112
194
113
195
// forGivenIngesterSets runs f, in parallel, for given ingester sets
114
- // waitForAllResponses param can be used to require results from all ingesters in all replication sets. If this is set to false, the call will return as soon as we have a quorum by zone.
115
- func (q * IngesterQuerier ) forGivenIngesterSets (ctx context.Context , waitForAllResponses bool , replicationSet []ring.ReplicationSet , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
196
+ func (q * IngesterQuerier ) forGivenIngesterSets (ctx context.Context , replicationSet []ring.ReplicationSet , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
116
197
// Enable minimize requests if we can, so we initially query a single ingester per replication set, as each replication-set is one partition.
117
198
// Ingesters must supply zone information for this to have an effect.
118
199
config := ring.DoUntilQuorumConfig {
119
- MinimizeRequests : ! waitForAllResponses ,
200
+ MinimizeRequests : true ,
120
201
}
121
202
return concurrency .ForEachJobMergeResults [ring.ReplicationSet , responseFromIngesters ](ctx , replicationSet , 0 , func (ctx context.Context , set ring.ReplicationSet ) ([]responseFromIngesters , error ) {
122
- if waitForAllResponses {
123
- // Tell the ring we need to return all responses from all zones
124
- set .MaxErrors = 0
125
- set .MaxUnavailableZones = 0
126
- }
127
203
return q .forGivenIngesters (ctx , set , config , f )
128
204
})
129
205
}
@@ -135,17 +211,16 @@ func (q *IngesterQuerier) forGivenIngesters(ctx context.Context, replicationSet
135
211
if err != nil {
136
212
return responseFromIngesters {addr : ingester .Addr }, err
137
213
}
138
-
139
214
resp , err := f (ctx , client .(logproto.QuerierClient ))
140
215
if err != nil {
141
216
return responseFromIngesters {addr : ingester .Addr }, err
142
217
}
143
218
219
+ ExtractPartitionContext (ctx ).AddClient (client .(logproto.QuerierClient ), ingester .Addr )
144
220
return responseFromIngesters {ingester .Addr , resp }, nil
145
- }, func (responseFromIngesters ) {
146
- // Nothing to do
221
+ }, func (cleanup responseFromIngesters ) {
222
+ ExtractPartitionContext ( ctx ). RemoveClient ( cleanup . addr )
147
223
})
148
-
149
224
if err != nil {
150
225
return nil , err
151
226
}
@@ -157,7 +232,7 @@ func (q *IngesterQuerier) forGivenIngesters(ctx context.Context, replicationSet
157
232
}
158
233
159
234
func (q * IngesterQuerier ) SelectLogs (ctx context.Context , params logql.SelectLogParams ) ([]iter.EntryIterator , error ) {
160
- resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
235
+ resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
161
236
stats .FromContext (ctx ).AddIngesterReached (1 )
162
237
return client .Query (ctx , params .QueryRequest )
163
238
})
@@ -173,7 +248,7 @@ func (q *IngesterQuerier) SelectLogs(ctx context.Context, params logql.SelectLog
173
248
}
174
249
175
250
func (q * IngesterQuerier ) SelectSample (ctx context.Context , params logql.SelectSampleParams ) ([]iter.SampleIterator , error ) {
176
- resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
251
+ resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
177
252
stats .FromContext (ctx ).AddIngesterReached (1 )
178
253
return client .QuerySample (ctx , params .SampleQueryRequest )
179
254
})
@@ -189,7 +264,7 @@ func (q *IngesterQuerier) SelectSample(ctx context.Context, params logql.SelectS
189
264
}
190
265
191
266
func (q * IngesterQuerier ) Label (ctx context.Context , req * logproto.LabelRequest ) ([][]string , error ) {
192
- resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
267
+ resps , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
193
268
return client .Label (ctx , req )
194
269
})
195
270
if err != nil {
@@ -205,7 +280,7 @@ func (q *IngesterQuerier) Label(ctx context.Context, req *logproto.LabelRequest)
205
280
}
206
281
207
282
func (q * IngesterQuerier ) Tail (ctx context.Context , req * logproto.TailRequest ) (map [string ]logproto.Querier_TailClient , error ) {
208
- resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
283
+ resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
209
284
return client .Tail (ctx , req )
210
285
})
211
286
if err != nil {
@@ -270,7 +345,7 @@ func (q *IngesterQuerier) TailDisconnectedIngesters(ctx context.Context, req *lo
270
345
}
271
346
272
347
func (q * IngesterQuerier ) Series (ctx context.Context , req * logproto.SeriesRequest ) ([][]logproto.SeriesIdentifier , error ) {
273
- resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
348
+ resps , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
274
349
return client .Series (ctx , req )
275
350
})
276
351
if err != nil {
@@ -325,15 +400,22 @@ func (q *IngesterQuerier) TailersCount(ctx context.Context) ([]uint32, error) {
325
400
}
326
401
327
402
func (q * IngesterQuerier ) GetChunkIDs (ctx context.Context , from , through model.Time , matchers ... * labels.Matcher ) ([]string , error ) {
328
- // We must wait for all responses when using partition-ingesters to avoid a race between Query and GetChunkIDs calls.
329
- // This occurs if call Query on an ingester after a recent flush then call GetChunkIDs on a different, unflushed ingester in the same partition.
330
- resps , err := q .forAllIngesters (ctx , q .querierConfig .QueryPartitionIngesters , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
403
+ ingesterQueryFn := q .forAllIngesters
404
+
405
+ partitionCtx := ExtractPartitionContext (ctx )
406
+ if partitionCtx .IsPartitioned () {
407
+ // We need to query the same ingesters as the previous query
408
+ ingesterQueryFn = partitionCtx .forQueriedIngesters
409
+ }
410
+
411
+ resps , err := ingesterQueryFn (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
331
412
return querierClient .GetChunkIDs (ctx , & logproto.GetChunkIDsRequest {
332
413
Matchers : convertMatchersToString (matchers ),
333
414
Start : from .Time (),
334
415
End : through .Time (),
335
416
})
336
417
})
418
+
337
419
if err != nil {
338
420
return nil , err
339
421
}
@@ -347,14 +429,13 @@ func (q *IngesterQuerier) GetChunkIDs(ctx context.Context, from, through model.T
347
429
}
348
430
349
431
func (q * IngesterQuerier ) Stats (ctx context.Context , _ string , from , through model.Time , matchers ... * labels.Matcher ) (* index_stats.Stats , error ) {
350
- resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
432
+ resps , err := q .forAllIngesters (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
351
433
return querierClient .GetStats (ctx , & logproto.IndexStatsRequest {
352
434
From : from ,
353
435
Through : through ,
354
436
Matchers : syntax .MatchersString (matchers ),
355
437
})
356
438
})
357
-
358
439
if err != nil {
359
440
if isUnimplementedCallError (err ) {
360
441
// Handle communication with older ingesters gracefully
@@ -378,7 +459,7 @@ func (q *IngesterQuerier) Volume(ctx context.Context, _ string, from, through mo
378
459
matcherString = syntax .MatchersString (matchers )
379
460
}
380
461
381
- resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
462
+ resps , err := q .forAllIngesters (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
382
463
return querierClient .GetVolume (ctx , & logproto.VolumeRequest {
383
464
From : from ,
384
465
Through : through ,
@@ -388,7 +469,6 @@ func (q *IngesterQuerier) Volume(ctx context.Context, _ string, from, through mo
388
469
AggregateBy : aggregateBy ,
389
470
})
390
471
})
391
-
392
472
if err != nil {
393
473
if isUnimplementedCallError (err ) {
394
474
// Handle communication with older ingesters gracefully
@@ -407,10 +487,9 @@ func (q *IngesterQuerier) Volume(ctx context.Context, _ string, from, through mo
407
487
}
408
488
409
489
func (q * IngesterQuerier ) DetectedLabel (ctx context.Context , req * logproto.DetectedLabelsRequest ) (* logproto.LabelToValuesResponse , error ) {
410
- ingesterResponses , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
490
+ ingesterResponses , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
411
491
return client .GetDetectedLabels (ctx , req )
412
492
})
413
-
414
493
if err != nil {
415
494
level .Error (q .logger ).Log ("msg" , "error getting detected labels" , "err" , err )
416
495
return nil , err
0 commit comments