@@ -35,6 +35,10 @@ import (
35
35
util_log "github.com/grafana/loki/v3/pkg/util/log"
36
36
)
37
37
38
+ var defaultQuorumConfig = ring.DoUntilQuorumConfig {
39
+ // Nothing here
40
+ }
41
+
38
42
type responseFromIngesters struct {
39
43
addr string
40
44
response interface {}
@@ -79,7 +83,8 @@ func newIngesterQuerier(querierConfig Config, clientCfg client.Config, ring ring
79
83
}
80
84
81
85
// forAllIngesters runs f, in parallel, for all ingesters
82
- func (q * IngesterQuerier ) forAllIngesters (ctx context.Context , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
86
+ // waitForAllResponses param can be used to require results from all ingesters in the replication set. If this is set to false, the call will return as soon as we have a quorum by zone. Only valid for partition-ingesters.
87
+ func (q * IngesterQuerier ) forAllIngesters (ctx context.Context , waitForAllResponses bool , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
83
88
if q .querierConfig .QueryPartitionIngesters {
84
89
tenantID , err := user .ExtractOrgID (ctx )
85
90
if err != nil {
@@ -94,36 +99,36 @@ func (q *IngesterQuerier) forAllIngesters(ctx context.Context, f func(context.Co
94
99
if err != nil {
95
100
return nil , err
96
101
}
97
- return q .forGivenIngesterSets (ctx , replicationSets , f )
102
+ return q .forGivenIngesterSets (ctx , waitForAllResponses , replicationSets , f )
98
103
}
99
104
100
105
replicationSet , err := q .ring .GetReplicationSetForOperation (ring .Read )
101
106
if err != nil {
102
107
return nil , err
103
108
}
104
109
105
- return q .forGivenIngesters (ctx , replicationSet , defaultQuorumConfig () , f )
110
+ return q .forGivenIngesters (ctx , replicationSet , defaultQuorumConfig , f )
106
111
}
107
112
108
113
// forGivenIngesterSets runs f, in parallel, for given ingester sets
109
- func (q * IngesterQuerier ) forGivenIngesterSets (ctx context.Context , replicationSet []ring.ReplicationSet , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
110
- // Enable minimize requests so we initially query a single ingester per replication set, as each replication-set is one partition.
114
+ // waitForAllResponses param can be used to require results from all ingesters in all replication sets. If this is set to false, the call will return as soon as we have a quorum by zone.
115
+ func (q * IngesterQuerier ) forGivenIngesterSets (ctx context.Context , waitForAllResponses bool , replicationSet []ring.ReplicationSet , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
116
+ // Enable minimize requests if we can, so we initially query a single ingester per replication set, as each replication-set is one partition.
111
117
// Ingesters must supply zone information for this to have an effect.
112
118
config := ring.DoUntilQuorumConfig {
113
- MinimizeRequests : true ,
119
+ MinimizeRequests : ! waitForAllResponses ,
114
120
}
115
121
return concurrency .ForEachJobMergeResults [ring.ReplicationSet , responseFromIngesters ](ctx , replicationSet , 0 , func (ctx context.Context , set ring.ReplicationSet ) ([]responseFromIngesters , error ) {
122
+ if waitForAllResponses {
123
+ // Tell the ring we need to return all responses from all zones
124
+ set .MaxErrors = 0
125
+ set .MaxUnavailableZones = 0
126
+ }
116
127
return q .forGivenIngesters (ctx , set , config , f )
117
128
})
118
129
}
119
130
120
- func defaultQuorumConfig () ring.DoUntilQuorumConfig {
121
- return ring.DoUntilQuorumConfig {
122
- // Nothing here
123
- }
124
- }
125
-
126
- // forGivenIngesters runs f, in parallel, for given ingesters
131
+ // forGivenIngesters runs f, in parallel, for given ingesters until a quorum of responses are received
127
132
func (q * IngesterQuerier ) forGivenIngesters (ctx context.Context , replicationSet ring.ReplicationSet , quorumConfig ring.DoUntilQuorumConfig , f func (context.Context , logproto.QuerierClient ) (interface {}, error )) ([]responseFromIngesters , error ) {
128
133
results , err := ring .DoUntilQuorum (ctx , replicationSet , quorumConfig , func (ctx context.Context , ingester * ring.InstanceDesc ) (responseFromIngesters , error ) {
129
134
client , err := q .pool .GetClientFor (ingester .Addr )
@@ -152,7 +157,7 @@ func (q *IngesterQuerier) forGivenIngesters(ctx context.Context, replicationSet
152
157
}
153
158
154
159
func (q * IngesterQuerier ) SelectLogs (ctx context.Context , params logql.SelectLogParams ) ([]iter.EntryIterator , error ) {
155
- resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
160
+ resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
156
161
stats .FromContext (ctx ).AddIngesterReached (1 )
157
162
return client .Query (ctx , params .QueryRequest )
158
163
})
@@ -168,7 +173,7 @@ func (q *IngesterQuerier) SelectLogs(ctx context.Context, params logql.SelectLog
168
173
}
169
174
170
175
func (q * IngesterQuerier ) SelectSample (ctx context.Context , params logql.SelectSampleParams ) ([]iter.SampleIterator , error ) {
171
- resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
176
+ resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
172
177
stats .FromContext (ctx ).AddIngesterReached (1 )
173
178
return client .QuerySample (ctx , params .SampleQueryRequest )
174
179
})
@@ -184,7 +189,7 @@ func (q *IngesterQuerier) SelectSample(ctx context.Context, params logql.SelectS
184
189
}
185
190
186
191
func (q * IngesterQuerier ) Label (ctx context.Context , req * logproto.LabelRequest ) ([][]string , error ) {
187
- resps , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
192
+ resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
188
193
return client .Label (ctx , req )
189
194
})
190
195
if err != nil {
@@ -200,7 +205,7 @@ func (q *IngesterQuerier) Label(ctx context.Context, req *logproto.LabelRequest)
200
205
}
201
206
202
207
func (q * IngesterQuerier ) Tail (ctx context.Context , req * logproto.TailRequest ) (map [string ]logproto.Querier_TailClient , error ) {
203
- resps , err := q .forAllIngesters (ctx , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
208
+ resps , err := q .forAllIngesters (ctx , false , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
204
209
return client .Tail (ctx , req )
205
210
})
206
211
if err != nil {
@@ -249,7 +254,7 @@ func (q *IngesterQuerier) TailDisconnectedIngesters(ctx context.Context, req *lo
249
254
}
250
255
251
256
// Instance a tail client for each ingester to re(connect)
252
- reconnectClients , err := q .forGivenIngesters (ctx , ring.ReplicationSet {Instances : reconnectIngesters }, defaultQuorumConfig () , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
257
+ reconnectClients , err := q .forGivenIngesters (ctx , ring.ReplicationSet {Instances : reconnectIngesters }, defaultQuorumConfig , func (_ context.Context , client logproto.QuerierClient ) (interface {}, error ) {
253
258
return client .Tail (ctx , req )
254
259
})
255
260
if err != nil {
@@ -265,7 +270,7 @@ func (q *IngesterQuerier) TailDisconnectedIngesters(ctx context.Context, req *lo
265
270
}
266
271
267
272
func (q * IngesterQuerier ) Series (ctx context.Context , req * logproto.SeriesRequest ) ([][]logproto.SeriesIdentifier , error ) {
268
- resps , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
273
+ resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
269
274
return client .Series (ctx , req )
270
275
})
271
276
if err != nil {
@@ -297,7 +302,7 @@ func (q *IngesterQuerier) TailersCount(ctx context.Context) ([]uint32, error) {
297
302
return nil , httpgrpc .Errorf (http .StatusInternalServerError , "no active ingester found" )
298
303
}
299
304
300
- responses , err := q .forGivenIngesters (ctx , replicationSet , defaultQuorumConfig () , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
305
+ responses , err := q .forGivenIngesters (ctx , replicationSet , defaultQuorumConfig , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
301
306
resp , err := querierClient .TailersCount (ctx , & logproto.TailersCountRequest {})
302
307
if err != nil {
303
308
return nil , err
@@ -320,7 +325,9 @@ func (q *IngesterQuerier) TailersCount(ctx context.Context) ([]uint32, error) {
320
325
}
321
326
322
327
func (q * IngesterQuerier ) GetChunkIDs (ctx context.Context , from , through model.Time , matchers ... * labels.Matcher ) ([]string , error ) {
323
- resps , err := q .forAllIngesters (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
328
+ // We must wait for all responses when using partition-ingesters to avoid a race between Query and GetChunkIDs calls.
329
+ // This occurs if call Query on an ingester after a recent flush then call GetChunkIDs on a different, unflushed ingester in the same partition.
330
+ resps , err := q .forAllIngesters (ctx , q .querierConfig .QueryPartitionIngesters , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
324
331
return querierClient .GetChunkIDs (ctx , & logproto.GetChunkIDsRequest {
325
332
Matchers : convertMatchersToString (matchers ),
326
333
Start : from .Time (),
@@ -340,7 +347,7 @@ func (q *IngesterQuerier) GetChunkIDs(ctx context.Context, from, through model.T
340
347
}
341
348
342
349
func (q * IngesterQuerier ) Stats (ctx context.Context , _ string , from , through model.Time , matchers ... * labels.Matcher ) (* index_stats.Stats , error ) {
343
- resps , err := q .forAllIngesters (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
350
+ resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
344
351
return querierClient .GetStats (ctx , & logproto.IndexStatsRequest {
345
352
From : from ,
346
353
Through : through ,
@@ -371,7 +378,7 @@ func (q *IngesterQuerier) Volume(ctx context.Context, _ string, from, through mo
371
378
matcherString = syntax .MatchersString (matchers )
372
379
}
373
380
374
- resps , err := q .forAllIngesters (ctx , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
381
+ resps , err := q .forAllIngesters (ctx , false , func (ctx context.Context , querierClient logproto.QuerierClient ) (interface {}, error ) {
375
382
return querierClient .GetVolume (ctx , & logproto.VolumeRequest {
376
383
From : from ,
377
384
Through : through ,
@@ -400,7 +407,7 @@ func (q *IngesterQuerier) Volume(ctx context.Context, _ string, from, through mo
400
407
}
401
408
402
409
func (q * IngesterQuerier ) DetectedLabel (ctx context.Context , req * logproto.DetectedLabelsRequest ) (* logproto.LabelToValuesResponse , error ) {
403
- ingesterResponses , err := q .forAllIngesters (ctx , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
410
+ ingesterResponses , err := q .forAllIngesters (ctx , false , func (ctx context.Context , client logproto.QuerierClient ) (interface {}, error ) {
404
411
return client .GetDetectedLabels (ctx , req )
405
412
})
406
413
0 commit comments