Skip to content

Commit bef2043

Browse files
authored
perf(approx_topk): Reduce memory usage of HyperLogLog in approx_topk. (#15559)
The count min sketch data structure backing the new approx_topk aggregation uses HyperLogLog (HLL) to track the actual cardinality of the aggregated vector. We were using the sparse version of the HLL. However, that resulted in memory and allocation overhead.
1 parent 5f476a3 commit bef2043

File tree

4 files changed

+23
-5
lines changed

4 files changed

+23
-5
lines changed

‎pkg/logql/count_min_sketch.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package logql
33
import (
44
"container/heap"
55
"fmt"
6+
"slices"
7+
"strings"
68

79
"github.com/axiomhq/hyperloglog"
810
"github.com/cespare/xxhash/v2"
@@ -187,16 +189,17 @@ func NewHeapCountMinSketchVector(ts int64, metricsLength, maxLabels int) HeapCou
187189
}
188190

189191
func (v *HeapCountMinSketchVector) Add(metric labels.Labels, value float64) {
192+
slices.SortFunc(metric, func(a, b labels.Label) int { return strings.Compare(a.Name, b.Name) })
190193
v.buffer = metric.Bytes(v.buffer)
191194

192195
v.F.Add(v.buffer, value)
193196

194-
// Add our metric if we haven't seen it
195-
196197
// TODO(karsten): There is a chance that the ids match but not the labels due to hash collision. Ideally there's
197198
// an else block the compares the series labels. However, that's not trivial. Besides, instance.Series has the
198199
// same issue in its deduping logic.
199200
id := xxhash.Sum64(v.buffer)
201+
202+
// Add our metric if we haven't seen it
200203
if _, ok := v.observed[id]; !ok {
201204
heap.Push(v, metric)
202205
v.observed[id] = struct{}{}

‎pkg/logql/log/labels.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package log
22

33
import (
44
"fmt"
5+
"slices"
56
"sort"
7+
"strings"
68
"sync"
79

810
"github.com/prometheus/prometheus/model/labels"
@@ -585,7 +587,8 @@ func (b *LabelsBuilder) LabelsResult() LabelsResult {
585587

586588
// Get all labels at once and sort them
587589
b.buf = b.UnsortedLabels(b.buf)
588-
sort.Sort(b.buf)
590+
// sort.Sort(b.buf)
591+
slices.SortFunc(b.buf, func(a, b labels.Label) int { return strings.Compare(a.Name, b.Name) })
589592
hash := b.hasher.Hash(b.buf)
590593

591594
if cached, ok := b.resultCache[hash]; ok {

‎pkg/logql/log/parser_test.go

+13-1
Original file line numberDiff line numberDiff line change
@@ -241,25 +241,32 @@ func (p *fakeParseHints) ShouldExtract(key string) bool {
241241
p.checkCount++
242242
return key == p.label || p.extractAll
243243
}
244+
244245
func (p *fakeParseHints) ShouldExtractPrefix(prefix string) bool {
245246
return prefix == p.label || p.extractAll
246247
}
248+
247249
func (p *fakeParseHints) NoLabels() bool {
248250
return false
249251
}
252+
250253
func (p *fakeParseHints) RecordExtracted(_ string) {
251254
p.count++
252255
}
256+
253257
func (p *fakeParseHints) AllRequiredExtracted() bool {
254258
return !p.extractAll && p.count == 1
255259
}
260+
256261
func (p *fakeParseHints) Reset() {
257262
p.checkCount = 0
258263
p.count = 0
259264
}
265+
260266
func (p *fakeParseHints) PreserveError() bool {
261267
return false
262268
}
269+
263270
func (p *fakeParseHints) ShouldContinueParsingLine(_ string, _ *LabelsBuilder) bool {
264271
return p.keepGoing
265272
}
@@ -656,30 +663,36 @@ func Benchmark_Parser(b *testing.B) {
656663
b.Run(tt.name, func(b *testing.B) {
657664
line := []byte(tt.line)
658665
b.Run("no labels hints", func(b *testing.B) {
666+
b.ReportAllocs()
659667
builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
660668
for n := 0; n < b.N; n++ {
661669
builder.Reset()
662670
_, _ = tt.s.Process(0, line, builder)
671+
builder.LabelsResult()
663672
}
664673
})
665674

666675
b.Run("labels hints", func(b *testing.B) {
676+
b.ReportAllocs()
667677
builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
668678
builder.parserKeyHints = NewParserHint(tt.LabelParseHints, tt.LabelParseHints, false, false, "", nil)
669679

670680
for n := 0; n < b.N; n++ {
671681
builder.Reset()
672682
_, _ = tt.s.Process(0, line, builder)
683+
builder.LabelsResult()
673684
}
674685
})
675686

676687
b.Run("inline stages", func(b *testing.B) {
688+
b.ReportAllocs()
677689
stages := []Stage{NewStringLabelFilter(tt.LabelFilterParseHint)}
678690
builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
679691
builder.parserKeyHints = NewParserHint(nil, nil, false, false, ", nil", stages)
680692
for n := 0; n < b.N; n++ {
681693
builder.Reset()
682694
_, _ = tt.s.Process(0, line, builder)
695+
builder.LabelsResult()
683696
}
684697
})
685698
})
@@ -1251,7 +1264,6 @@ func TestXExpressionParserFailures(t *testing.T) {
12511264
},
12521265
}
12531266
for _, tt := range tests {
1254-
12551267
t.Run(tt.name, func(t *testing.T) {
12561268
_, err := NewLogfmtExpressionParser([]LabelExtractionExpr{tt.expression}, false)
12571269

‎pkg/logql/sketch/cms.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func NewCountMinSketch(w, d uint32) (*CountMinSketch, error) {
1919
Depth: d,
2020
Width: w,
2121
Counters: make2dslice(w, d),
22-
HyperLogLog: hyperloglog.New16(),
22+
HyperLogLog: hyperloglog.New16NoSparse(),
2323
}, nil
2424
}
2525

0 commit comments

Comments
 (0)