Skip to content

Commit 215f994

Browse files
authored
fix(metric_aggregation): Fix duplicate metrics registration (#15142)
1 parent 716d54e commit 215f994

File tree

4 files changed

+91
-100
lines changed

4 files changed

+91
-100
lines changed

‎pkg/pattern/aggregation/metrics.go

+83-92
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
package aggregation
22

33
import (
4+
"sync"
5+
46
"github.com/prometheus/client_golang/prometheus"
57
"github.com/prometheus/client_golang/prometheus/promauto"
68

79
"github.com/grafana/loki/v3/pkg/util/constants"
810
)
911

12+
var (
13+
aggMetrics *Metrics
14+
metricsOnce sync.Once
15+
)
16+
1017
type Metrics struct {
1118
reg prometheus.Registerer
1219

@@ -28,96 +35,80 @@ type Metrics struct {
2835
}
2936

3037
func NewMetrics(r prometheus.Registerer) *Metrics {
31-
var m Metrics
32-
m.reg = r
33-
34-
m = Metrics{
35-
chunks: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
36-
Namespace: constants.Loki,
37-
Subsystem: "pattern_ingester",
38-
Name: "metric_chunks",
39-
Help: "The total number of chunks in memory.",
40-
}, []string{"service_name"}),
41-
samples: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
42-
Namespace: constants.Loki,
43-
Subsystem: "pattern_ingester",
44-
Name: "metric_samples",
45-
Help: "The total number of samples in memory.",
46-
}, []string{"service_name"}),
47-
pushErrors: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
48-
Namespace: constants.Loki,
49-
Subsystem: "pattern_ingester",
50-
Name: "push_errors_total",
51-
Help: "Total number of errors when pushing metrics to Loki.",
52-
}, []string{"tenant_id", "error_type"}),
53-
54-
pushRetries: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
55-
Namespace: constants.Loki,
56-
Subsystem: "pattern_ingester",
57-
Name: "push_retries_total",
58-
Help: "Total number of retries when pushing metrics to Loki.",
59-
}, []string{"tenant_id"}),
60-
61-
pushSuccesses: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
62-
Namespace: constants.Loki,
63-
Subsystem: "pattern_ingester",
64-
Name: "push_successes_total",
65-
Help: "Total number of successful pushes to Loki.",
66-
}, []string{"tenant_id"}),
67-
68-
// Batch metrics
69-
payloadSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
70-
Namespace: constants.Loki,
71-
Subsystem: "pattern_ingester",
72-
Name: "push_payload_bytes",
73-
Help: "Size of push payloads in bytes.",
74-
Buckets: []float64{1024, 4096, 16384, 65536, 262144, 1048576},
75-
}, []string{"tenant_id"}),
76-
77-
streamsPerPush: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
78-
Namespace: constants.Loki,
79-
Subsystem: "pattern_ingester",
80-
Name: "streams_per_push",
81-
Help: "Number of streams in each push request.",
82-
Buckets: []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000},
83-
}, []string{"tenant_id"}),
84-
85-
entriesPerPush: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
86-
Namespace: constants.Loki,
87-
Subsystem: "pattern_ingester",
88-
Name: "entries_per_push",
89-
Help: "Number of entries in each push request.",
90-
Buckets: []float64{10, 50, 100, 500, 1000, 5000, 10000},
91-
}, []string{"tenant_id"}),
92-
93-
servicesTracked: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
94-
Namespace: constants.Loki,
95-
Subsystem: "pattern_ingester",
96-
Name: "services_tracked",
97-
Help: "Number of unique services being tracked.",
98-
}, []string{"tenant_id"}),
99-
writeTimeout: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
100-
Namespace: constants.Loki,
101-
Subsystem: "pattern_ingester",
102-
Name: "write_timeouts_total",
103-
Help: "Total number of write timeouts.",
104-
}, []string{"tenant_id"}),
105-
}
106-
107-
if m.reg != nil {
108-
m.reg.MustRegister(
109-
m.chunks,
110-
m.samples,
111-
m.pushErrors,
112-
m.pushRetries,
113-
m.pushSuccesses,
114-
m.payloadSize,
115-
m.streamsPerPush,
116-
m.entriesPerPush,
117-
m.servicesTracked,
118-
m.writeTimeout,
119-
)
120-
}
121-
122-
return &m
38+
metricsOnce.Do(func() {
39+
aggMetrics = &Metrics{
40+
chunks: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
41+
Namespace: constants.Loki,
42+
Subsystem: "pattern_ingester",
43+
Name: "metric_chunks",
44+
Help: "The total number of chunks in memory.",
45+
}, []string{"service_name"}),
46+
samples: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
47+
Namespace: constants.Loki,
48+
Subsystem: "pattern_ingester",
49+
Name: "metric_samples",
50+
Help: "The total number of samples in memory.",
51+
}, []string{"service_name"}),
52+
pushErrors: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
53+
Namespace: constants.Loki,
54+
Subsystem: "pattern_ingester",
55+
Name: "push_errors_total",
56+
Help: "Total number of errors when pushing metrics to Loki.",
57+
}, []string{"tenant_id", "error_type"}),
58+
59+
pushRetries: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
60+
Namespace: constants.Loki,
61+
Subsystem: "pattern_ingester",
62+
Name: "push_retries_total",
63+
Help: "Total number of retries when pushing metrics to Loki.",
64+
}, []string{"tenant_id"}),
65+
66+
pushSuccesses: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
67+
Namespace: constants.Loki,
68+
Subsystem: "pattern_ingester",
69+
Name: "push_successes_total",
70+
Help: "Total number of successful pushes to Loki.",
71+
}, []string{"tenant_id"}),
72+
73+
// Batch metrics
74+
payloadSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
75+
Namespace: constants.Loki,
76+
Subsystem: "pattern_ingester",
77+
Name: "push_payload_bytes",
78+
Help: "Size of push payloads in bytes.",
79+
Buckets: []float64{1024, 4096, 16384, 65536, 262144, 1048576},
80+
}, []string{"tenant_id"}),
81+
82+
streamsPerPush: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
83+
Namespace: constants.Loki,
84+
Subsystem: "pattern_ingester",
85+
Name: "streams_per_push",
86+
Help: "Number of streams in each push request.",
87+
Buckets: []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000},
88+
}, []string{"tenant_id"}),
89+
90+
entriesPerPush: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
91+
Namespace: constants.Loki,
92+
Subsystem: "pattern_ingester",
93+
Name: "entries_per_push",
94+
Help: "Number of entries in each push request.",
95+
Buckets: []float64{10, 50, 100, 500, 1000, 5000, 10000},
96+
}, []string{"tenant_id"}),
97+
98+
servicesTracked: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
99+
Namespace: constants.Loki,
100+
Subsystem: "pattern_ingester",
101+
Name: "services_tracked",
102+
Help: "Number of unique services being tracked.",
103+
}, []string{"tenant_id"}),
104+
writeTimeout: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
105+
Namespace: constants.Loki,
106+
Subsystem: "pattern_ingester",
107+
Name: "write_timeouts_total",
108+
Help: "Total number of write timeouts.",
109+
}, []string{"tenant_id"}),
110+
}
111+
})
112+
113+
return aggMetrics
123114
}

‎pkg/pattern/aggregation/push.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
"github.com/go-kit/log/level"
1818
"github.com/golang/snappy"
1919
"github.com/opentracing/opentracing-go"
20-
"github.com/prometheus/client_golang/prometheus"
2120
"github.com/prometheus/common/config"
2221
"github.com/prometheus/common/model"
2322
"github.com/prometheus/prometheus/model/labels"
@@ -112,7 +111,7 @@ func NewPush(
112111
useTLS bool,
113112
backoffCfg *backoff.Config,
114113
logger log.Logger,
115-
registrer prometheus.Registerer,
114+
metrics *Metrics,
116115
) (*Push, error) {
117116
client, err := config.NewClientFromConfig(cfg, "pattern-ingester-push", config.WithHTTP2Disabled())
118117
if err != nil {
@@ -147,7 +146,7 @@ func NewPush(
147146
entries: entries{
148147
entries: make([]entry, 0),
149148
},
150-
metrics: NewMetrics(registrer),
149+
metrics: metrics,
151150
}
152151

153152
go p.run(pushPeriod)
@@ -284,12 +283,12 @@ func (p *Push) run(pushPeriod time.Duration) {
284283
}
285284

286285
if !backoff.Ongoing() {
287-
level.Error(p.logger).Log("msg", "failed to send entry, retries exhausted, entry will be dropped", "entry", "status", status, "error", err)
286+
level.Error(p.logger).Log("msg", "failed to send entry, retries exhausted, entry will be dropped", "status", status, "error", err)
288287
pushTicker.Reset(pushPeriod)
289288
break
290289
}
291290
level.Warn(p.logger).
292-
Log("msg", "failed to send entry, retrying", "entry", "status", status, "error", err)
291+
Log("msg", "failed to send entry, retrying", "status", status, "error", err)
293292
backoff.Wait()
294293
}
295294

‎pkg/pattern/aggregation/push_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func Test_Push(t *testing.T) {
5858
false,
5959
&backoff,
6060
log.NewNopLogger(),
61-
nil,
61+
NewMetrics(nil),
6262
)
6363
require.NoError(t, err)
6464
ts, payload := testPayload()
@@ -83,7 +83,7 @@ func Test_Push(t *testing.T) {
8383
"user", "secret",
8484
false,
8585
&backoff,
86-
log.NewNopLogger(), nil,
86+
log.NewNopLogger(), NewMetrics(nil),
8787
)
8888
require.NoError(t, err)
8989
ts, payload := testPayload()

‎pkg/pattern/ingester.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ func (i *Ingester) GetOrCreateInstance(instanceID string) (*instance, error) { /
392392

393393
aggCfg := i.cfg.MetricAggregation
394394
if i.limits.MetricAggregationEnabled(instanceID) {
395+
metricAggregationMetrics := aggregation.NewMetrics(i.registerer)
395396
writer, err = aggregation.NewPush(
396397
aggCfg.LokiAddr,
397398
instanceID,
@@ -403,7 +404,7 @@ func (i *Ingester) GetOrCreateInstance(instanceID string) (*instance, error) { /
403404
aggCfg.UseTLS,
404405
&aggCfg.BackoffConfig,
405406
i.logger,
406-
i.registerer,
407+
metricAggregationMetrics,
407408
)
408409
if err != nil {
409410
return nil, err

0 commit comments

Comments
 (0)