Skip to content

Commit ca40254

Browse files
committed
Optimize memory allocations for sites matrix vector stores
By * Caching common sites matrix setups (e.g. the single site in single site setups). * Adding a fast path to IntSets.HasAnyVector for the common case of single vector input. ``` AssembleDeepSiteWithManySections/depth=3/sectionsPerLevel=2/pagesPerSection=100-10 31.62m ± 46% 30.68m ± 42% ~ (p=0.310 n=6) │ master.bench │ perfcommon.bench │ │ B/op │ B/op vs base │ AssembleDeepSiteWithManySections/depth=3/sectionsPerLevel=2/pagesPerSection=100-10 31.98Mi ± 0% 31.24Mi ± 0% -2.30% (p=0.002 n=6) │ master.bench │ perfcommon.bench │ │ allocs/op │ allocs/op vs base │ AssembleDeepSiteWithManySections/depth=3/sectionsPerLevel=2/pagesPerSection=100-10 460.9k ± 0% 419.9k ± 0% -8.90% (p=0.002 n=6) ````
1 parent 44b5f13 commit ca40254

File tree

11 files changed

+181
-80
lines changed

11 files changed

+181
-80
lines changed

‎common/paths/pathparser.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ type PathParser struct {
5454

5555
// Below gets created on demand.
5656
initOnce sync.Once
57-
sitesMatrixCache *maps.Cache[string, sitesmatrix.VectorStore] // Maps language code to sites matrix vector store.
57+
sitesMatrixCache *maps.Cache[string, sitesmatrix.VectorStore] // Maps language index to sites matrix vector store.
5858
}
5959

6060
func (pp *PathParser) init() {

‎config/allconfig/allconfig.go‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,10 @@ func (c *Configs) Init(logger loggers.Logger) error {
878878
ConfiguredRoles: c.Base.Roles.Config,
879879
}
880880

881+
if err := c.ConfiguredDimensions.Init(); err != nil {
882+
return err
883+
}
884+
881885
intSetsCfg := sitesmatrix.IntSetsConfig{
882886
ApplyDefaults: sitesmatrix.IntSetsConfigApplyDefaultsIfNotSet,
883887
}

‎hugolib/content_map_page_assembler.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ func (a *allPagesAssembler) doCreatePages(prefix string) error {
575575
func() error {
576576
if i := len(missingVectorsForHomeOrRootSection); i > 0 {
577577
// Pick one, the rest will be created later.
578-
vec := missingVectorsForHomeOrRootSection.Sample()
578+
vec := missingVectorsForHomeOrRootSection.VectorSample()
579579

580580
kind := kinds.KindSection
581581
if s == "" {

‎hugolib/content_map_page_contentnode.go‎

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,25 @@ func (n contentNodesMap) sample() contentNode {
426426
}
427427

428428
func (n contentNodesMap) siteVectors() sitesmatrix.VectorIterator {
429-
return sitesmatrix.VectorIteratorFunc(func(yield func(v sitesmatrix.Vector) bool) bool {
430-
for k := range n {
431-
if !yield(k) {
432-
return false
433-
}
429+
return n
430+
}
431+
432+
func (n contentNodesMap) ForEachVector(yield func(v sitesmatrix.Vector) bool) bool {
433+
for v := range n {
434+
if !yield(v) {
435+
return false
434436
}
435-
return true
436-
})
437+
}
438+
return true
439+
}
440+
441+
func (n contentNodesMap) LenVectors() int {
442+
return len(n)
443+
}
444+
445+
func (n contentNodesMap) VectorSample() sitesmatrix.Vector {
446+
for v := range n {
447+
return v
448+
}
449+
panic("no vectors")
437450
}

‎hugolib/roles/roles.go‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ type RolesInternal struct {
7676
Sorted []RoleInternal
7777
}
7878

79+
func (r RolesInternal) Len() int {
80+
return len(r.Sorted)
81+
}
82+
7983
func (r RolesInternal) IndexDefault() int {
8084
for i, role := range r.Sorted {
8185
if role.Default {

‎hugolib/sitesmatrix/dimensions.go‎

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,13 @@ func (v1 Vector) HasVector(v2 Vector) bool {
7676
}
7777

7878
func (v1 Vector) HasAnyVector(vp VectorProvider) bool {
79-
if vp.LenVectors() == 0 {
79+
n := vp.LenVectors()
80+
if n == 0 {
8081
return false
8182
}
83+
if n == 1 {
84+
return v1 == vp.VectorSample()
85+
}
8286

8387
return !vp.ForEachVector(func(v2 Vector) bool {
8488
if v1 == v2 {
@@ -144,12 +148,16 @@ func (vs Vectors) ForEachVector(yield func(v Vector) bool) bool {
144148
return true
145149
}
146150

151+
func (vs Vectors) LenVectors() int {
152+
return len(vs)
153+
}
154+
147155
func (vs Vectors) ToVectorStore() VectorStore {
148156
return newVectorStoreMapFromVectors(vs)
149157
}
150158

151-
// Sample returns one of the vectors in the set.
152-
func (vs Vectors) Sample() Vector {
159+
// VectorSample returns one of the vectors in the set.
160+
func (vs Vectors) VectorSample() Vector {
153161
for v := range vs {
154162
return v
155163
}
@@ -161,14 +169,15 @@ type (
161169
// ForEachVector iterates over all vectors in the provider.
162170
// It returns false if the iteration was stopped early.
163171
ForEachVector(func(v Vector) bool) bool
164-
}
165-
)
166172

167-
type VectorIteratorFunc func(func(v Vector) bool) bool
173+
// LenVectors returns the number of vectors in the provider.
174+
LenVectors() int
168175

169-
func (f VectorIteratorFunc) ForEachVector(yield func(v Vector) bool) bool {
170-
return f(yield)
171-
}
176+
// VectorSample returns one of the vectors in the provider, usually the first or the only one.
177+
// This will panic if the provider is empty.
178+
VectorSample() Vector
179+
}
180+
)
172181

173182
// Bools holds boolean values for each dimension in the Hugo build matrix.
174183
type Bools [3]bool
@@ -198,13 +207,6 @@ type VectorProvider interface {
198207
// HasAnyVector returns true if any of the vectors in the provider matches any of the vectors in v.
199208
HasAnyVector(v VectorProvider) bool
200209

201-
// LenVectors returns the number of vectors in the provider.
202-
LenVectors() int
203-
204-
// VectorSample returns one of the vectors in the provider, usually the first or the only one.
205-
// This will panic if the provider is empty.
206-
VectorSample() Vector
207-
208210
// Equals returns true if this provider is equal to the other provider.
209211
EqualsVector(other VectorProvider) bool
210212
}
@@ -227,6 +229,22 @@ type ToVectorStoreProvider interface {
227229
ToVectorStore() VectorStore
228230
}
229231

232+
func VectorIteratorToStore(vi VectorIterator) VectorStore {
233+
switch v := vi.(type) {
234+
case VectorStore:
235+
return v
236+
case ToVectorStoreProvider:
237+
return v.ToVectorStore()
238+
}
239+
240+
vectors := make(Vectors)
241+
vi.ForEachVector(func(v Vector) bool {
242+
vectors[v] = struct{}{}
243+
return true
244+
})
245+
return vectors.ToVectorStore()
246+
}
247+
230248
type weightedVectorStore struct {
231249
VectorStore
232250
weight int

‎hugolib/sitesmatrix/vectorstores.go‎

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,6 @@ func (s *vectorStoreMap) setVector(vec Vector) {
7171
s.sets[vec] = struct{}{}
7272
}
7373

74-
func (s *vectorStoreMap) Ordinal() int {
75-
return 0
76-
}
77-
7874
func (s *vectorStoreMap) KeysSorted() ([]int, []int, []int) {
7975
var k0, k1, k2 []int
8076
for v := range s.sets {
@@ -271,12 +267,45 @@ type ConfiguredDimension interface {
271267
ResolveIndex(string) int
272268
ResolveName(int) string
273269
ForEachIndex() iter.Seq[int]
270+
Len() int
274271
}
275272

273+
// ConfiguredDimensions holds the configured dimensions for the site matrix.
276274
type ConfiguredDimensions struct {
277275
ConfiguredLanguages ConfiguredDimension
278276
ConfiguredVersions ConfiguredDimension
279277
ConfiguredRoles ConfiguredDimension
278+
CommonSitesMatrix CommonSitestMatrix
279+
280+
singleVectorStoreCache *maps.Cache[Vector, *IntSets]
281+
}
282+
283+
func (c *ConfiguredDimensions) IsSingleVector() bool {
284+
return c.ConfiguredLanguages.Len() == 1 && c.ConfiguredRoles.Len() == 1 && c.ConfiguredVersions.Len() == 1
285+
}
286+
287+
// GetOrCreateSingleVectorStore returns a VectorStore for the given vector.
288+
func (c *ConfiguredDimensions) GetOrCreateSingleVectorStore(vec Vector) *IntSets {
289+
store, _ := c.singleVectorStoreCache.GetOrCreate(vec, func() (*IntSets, error) {
290+
is := &IntSets{}
291+
is.setValuesInNilSets(vec, true, true, true)
292+
return is, nil
293+
})
294+
return store
295+
}
296+
297+
func (c *ConfiguredDimensions) Init() error {
298+
c.singleVectorStoreCache = maps.NewCache[Vector, *IntSets]()
299+
b := NewIntSetsBuilder(c).WithDefaultsIfNotSet().Build()
300+
defaultVec := b.VectorSample()
301+
c.singleVectorStoreCache.Set(defaultVec, b)
302+
c.CommonSitesMatrix.DefaultSite = b
303+
304+
return nil
305+
}
306+
307+
type CommonSitestMatrix struct {
308+
DefaultSite VectorStore
280309
}
281310

282311
func (c *ConfiguredDimensions) ResolveNames(v Vector) types.Strings3 {
@@ -317,6 +346,8 @@ type IntSets struct {
317346
h *hashOnce
318347
}
319348

349+
var NilStore *IntSets = nil
350+
320351
type hashOnce struct {
321352
once sync.Once
322353
hash uint64
@@ -363,7 +394,7 @@ func (s *IntSets) Intersects(other *IntSets) bool {
363394
// Complement returns a new VectorStore that contains all vectors in s that are not in any of ss.
364395
func (s *IntSets) Complement(ss ...VectorProvider) VectorStore {
365396
if len(ss) == 0 || (len(ss) == 1 && ss[0] == s) {
366-
return nil
397+
return NilStore
367398
}
368399

369400
for _, v := range ss {
@@ -372,8 +403,7 @@ func (s *IntSets) Complement(ss ...VectorProvider) VectorStore {
372403
continue
373404
}
374405
if vv.IsSuperSet(s) {
375-
var s *IntSets
376-
return s
406+
return NilStore
377407
}
378408
}
379409

@@ -484,6 +514,9 @@ func (s *IntSets) HasLanguage(lang int) bool {
484514
return s.languages.Has(lang)
485515
}
486516

517+
// LenVectors returns the total number of vectors represented by the IntSets.
518+
// This is the Cartesian product of the lengths of the individual sets.
519+
// This will be 0 if s is nil or any of the sets is empty.
487520
func (s *IntSets) LenVectors() int {
488521
if s == nil {
489522
return 0
@@ -526,6 +559,10 @@ func (s *IntSets) HasAnyVector(v VectorProvider) bool {
526559
if s.LenVectors() == 0 || v.LenVectors() == 0 {
527560
return false
528561
}
562+
if v.LenVectors() == 1 {
563+
// Fast path.
564+
return s.HasVector(v.VectorSample())
565+
}
529566

530567
if vs, ok := v.(*IntSets); ok {
531568
// Fast path.
@@ -688,6 +725,14 @@ type IntSetsBuilder struct {
688725

689726
func (b *IntSetsBuilder) Build() *IntSets {
690727
b.s.init()
728+
729+
if b.s.LenVectors() == 1 {
730+
// Cache it or use the existing cached version, which will allow b.s to be GCed.
731+
bb, _ := b.cfg.singleVectorStoreCache.GetOrCreate(b.s.VectorSample(), func() (*IntSets, error) {
732+
return b.s, nil
733+
})
734+
return bb
735+
}
691736
return b.s
692737
}
693738

@@ -889,6 +934,10 @@ type testDimension struct {
889934
names []string
890935
}
891936

937+
func (m testDimension) Len() int {
938+
return len(m.names)
939+
}
940+
892941
func (m testDimension) IndexDefault() int {
893942
return 0
894943
}
@@ -933,9 +982,13 @@ func (m *testDimension) IndexMatch(match predicate.P[string]) (iter.Seq[int], er
933982

934983
// NewTestingDimensions creates a new ConfiguredDimensions for testing.
935984
func NewTestingDimensions(languages, versions, roles []string) *ConfiguredDimensions {
936-
return &ConfiguredDimensions{
985+
c := &ConfiguredDimensions{
937986
ConfiguredLanguages: &testDimension{names: languages},
938987
ConfiguredVersions: &testDimension{names: versions},
939988
ConfiguredRoles: &testDimension{names: roles},
940989
}
990+
if err := c.Init(); err != nil {
991+
panic(err)
992+
}
993+
return c
941994
}

0 commit comments

Comments
 (0)