Skip to content

Conversation

@philippgille
Copy link
Owner

It requires more allocations (not constant anymore), but is 1) faster and 2) requires much smaller allocations, so overall a win.

goos: linux
goarch: amd64
pkg: github.com/philippgille/chromem-go
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                    │    after2     │               after3                │
                                    │    sec/op     │    sec/op     vs base               │
Collection_Query_NoContent_100-8      106.32µ ±  2%   90.79µ ±  2%  -14.61% (p=0.002 n=6)
Collection_Query_NoContent_1000-8      490.7µ ±  1%   518.8µ ±  1%   +5.74% (p=0.002 n=6)
Collection_Query_NoContent_5000-8      2.704m ±  5%   2.144m ±  1%  -20.72% (p=0.002 n=6)
Collection_Query_NoContent_25000-8    13.171m ± 11%   9.947m ±  2%  -24.48% (p=0.002 n=6)
Collection_Query_NoContent_100000-8    51.86m ± 13%   39.75m ±  1%  -23.34% (p=0.002 n=6)
Collection_Query_100-8                106.77µ ±  0%   90.99µ ±  1%  -14.78% (p=0.002 n=6)
Collection_Query_1000-8                489.7µ ±  0%   595.2µ ± 13%  +21.55% (p=0.002 n=6)
Collection_Query_5000-8                2.704m ±  6%   2.556m ±  1%   -5.47% (p=0.002 n=6)
Collection_Query_25000-8               13.05m ±  2%   11.66m ±  1%  -10.65% (p=0.002 n=6)
Collection_Query_100000-8              52.07m ±  5%   39.70m ± 12%  -23.76% (p=0.002 n=6)
geomean                                2.492m         2.192m        -12.07%

                                    │    after2     │               after3                │
                                    │     B/op      │     B/op      vs base               │
Collection_Query_NoContent_100-8       6.235Ki ± 0%   5.030Ki ± 0%  -19.32% (p=0.002 n=6)
Collection_Query_NoContent_1000-8      34.74Ki ± 0%   13.24Ki ± 0%  -61.88% (p=0.002 n=6)
Collection_Query_NoContent_5000-8     162.74Ki ± 0%   45.99Ki ± 0%  -71.74% (p=0.002 n=6)
Collection_Query_NoContent_25000-8     794.7Ki ± 0%   206.7Ki ± 0%  -73.99% (p=0.002 n=6)
Collection_Query_NoContent_100000-8   3130.7Ki ± 0%   791.4Ki ± 0%  -74.72% (p=0.002 n=6)
Collection_Query_100-8                 6.234Ki ± 0%   5.033Ki ± 0%  -19.27% (p=0.002 n=6)
Collection_Query_1000-8                34.74Ki ± 0%   13.25Ki ± 0%  -61.87% (p=0.002 n=6)
Collection_Query_5000-8               162.73Ki ± 0%   46.04Ki ± 0%  -71.71% (p=0.002 n=6)
Collection_Query_25000-8               794.7Ki ± 0%   206.8Ki ± 0%  -73.98% (p=0.002 n=6)
Collection_Query_100000-8             3130.8Ki ± 0%   791.4Ki ± 0%  -74.72% (p=0.002 n=6)
geomean                                154.4Ki        54.97Ki       -64.40%

                                    │   after2   │               after3                │
                                    │ allocs/op  │  allocs/op   vs base                │
Collection_Query_NoContent_100-8      41.00 ± 0%    94.00 ± 1%  +129.27% (p=0.002 n=6)
Collection_Query_NoContent_1000-8     41.00 ± 0%   140.50 ± 0%  +242.68% (p=0.002 n=6)
Collection_Query_NoContent_5000-8     41.00 ± 0%   172.00 ± 1%  +319.51% (p=0.002 n=6)
Collection_Query_NoContent_25000-8    41.00 ± 0%   204.00 ± 1%  +397.56% (p=0.002 n=6)
Collection_Query_NoContent_100000-8   41.00 ± 0%   232.00 ± 3%  +465.85% (p=0.002 n=6)
Collection_Query_100-8                41.00 ± 0%    94.50 ± 1%  +130.49% (p=0.002 n=6)
Collection_Query_1000-8               41.00 ± 0%   141.00 ± 1%  +243.90% (p=0.002 n=6)
Collection_Query_5000-8               41.00 ± 0%   174.50 ± 2%  +325.61% (p=0.002 n=6)
Collection_Query_25000-8              41.00 ± 0%   205.50 ± 2%  +401.22% (p=0.002 n=6)
Collection_Query_100000-8             41.50 ± 1%   233.00 ± 1%  +461.45% (p=0.002 n=6)
geomean                               41.05         161.4       +293.09%
@philippgille
Copy link
Owner Author

ℹ️ , just using the max heap without limiting to n elements is much worse.

Diff:

diff --git a/query.go b/query.go
index 240060c..2d6187c 100644
--- a/query.go
+++ b/query.go
@@ -1,12 +1,10 @@
 package chromem
 
 import (
-	"cmp"
 	"container/heap"
 	"context"
 	"fmt"
 	"runtime"
-	"slices"
 	"strings"
 	"sync"
 )
@@ -23,7 +21,7 @@ type docSim struct {
 type docMaxHeap []docSim
 
 func (h docMaxHeap) Len() int           { return len(h) }
-func (h docMaxHeap) Less(i, j int) bool { return h[i].similarity < h[j].similarity }
+func (h docMaxHeap) Less(i, j int) bool { return h[i].similarity > h[j].similarity }
 func (h docMaxHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
 
 func (h *docMaxHeap) Push(x any) {
@@ -40,48 +38,6 @@ func (h *docMaxHeap) Pop() any {
 	return x
 }
 
-// maxDocSims manages a max-heap of docSims with a fixed size, keeping the n highest
-// similarities. It's safe for concurrent use, but not the result of values().
-// In our benchmarks this was faster than sorting a slice of docSims at the end.
-type maxDocSims struct {
-	h    docMaxHeap
-	lock sync.RWMutex
-	size int
-}
-
-// newMaxDocSims creates a new nMaxDocs with a fixed size.
-func newMaxDocSims(size int) *maxDocSims {
-	return &maxDocSims{
-		h:    make(docMaxHeap, 0, size),
-		size: size,
-	}
-}
-
-// add inserts a new docSim into the heap, keeping only the top n similarities.
-func (mds *maxDocSims) add(doc docSim) {
-	mds.lock.Lock()
-	defer mds.lock.Unlock()
-	if mds.h.Len() < mds.size {
-		heap.Push(&mds.h, doc)
-	} else if mds.h.Len() > 0 && mds.h[0].similarity < doc.similarity {
-		// Replace the smallest similarity if the new doc's similarity is higher
-		heap.Pop(&mds.h)
-		heap.Push(&mds.h, doc)
-	}
-}
-
-// values returns the docSims in the heap, sorted by similarity (descending).
-// The call itself is safe for concurrent use with add(), but the result isn't.
-// Only work with the result after all calls to add() have finished.
-func (d *maxDocSims) values() []docSim {
-	d.lock.RLock()
-	defer d.lock.RUnlock()
-	slices.SortFunc(d.h, func(i, j docSim) int {
-		return cmp.Compare(j.similarity, i.similarity)
-	})
-	return d.h
-}
-
 // filterDocs filters a map of documents by metadata and content.
 // It does this concurrently.
 func filterDocs(docs map[string]*Document, where, whereDocument map[string]string) []*Document {
@@ -163,7 +119,8 @@ func documentMatchesFilters(document *Document, where, whereDocument map[string]
 }
 
 func getMostSimilarDocs(ctx context.Context, queryVectors []float32, docs []*Document, n int) ([]docSim, error) {
-	nMaxDocs := newMaxDocSims(n)
+	sortedDocs := &docMaxHeap{}
+	sortedDocksLock := sync.Mutex{}
 
 	// Determine concurrency. Use number of docs or CPUs, whichever is smaller.
 	numCPUs := runtime.NumCPU()
@@ -218,7 +175,10 @@ func getMostSimilarDocs(ctx context.Context, queryVectors []float32, docs []*Doc
 					return
 				}
 
-				nMaxDocs.add(docSim{docID: doc.ID, similarity: sim})
+				sortedDocksLock.Lock()
+				// We don't defer the unlock because we want to do it much earlier
+				heap.Push(sortedDocs, docSim{docID: doc.ID, similarity: sim})
+				sortedDocksLock.Unlock()
 			}
 		}(docs[start:end])
 	}
@@ -229,5 +189,11 @@ func getMostSimilarDocs(ctx context.Context, queryVectors []float32, docs []*Doc
 		return nil, sharedErr
 	}
 
-	return nMaxDocs.values(), nil
+	// Pop the n most similar documents from the max-heap.
+	res := make([]docSim, n)
+	for i := 0; i < n; i++ {
+		res[i] = heap.Pop(sortedDocs).(docSim)
+	}
+
+	return res, nil
 }

Benchmark:

goos: linux
goarch: amd64
pkg: github.com/philippgille/chromem-go
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
BenchmarkCollection_Query_NoContent_100-8      	   12337	     95199 ns/op	   14103 B/op	     159 allocs/op
BenchmarkCollection_Query_NoContent_1000-8     	    1550	    772920 ns/op	   94846 B/op	    1062 allocs/op
BenchmarkCollection_Query_NoContent_5000-8     	     297	   3638077 ns/op	  674183 B/op	    5067 allocs/op
BenchmarkCollection_Query_NoContent_25000-8    	      54	  18671256 ns/op	 3775638 B/op	   25073 allocs/op
BenchmarkCollection_Query_NoContent_100000-8   	      13	  77886654 ns/op	16176052 B/op	  100079 allocs/op
BenchmarkCollection_Query_100-8                	   12507	     95767 ns/op	   14103 B/op	     159 allocs/op
BenchmarkCollection_Query_1000-8               	    1538	    806681 ns/op	   94847 B/op	    1062 allocs/op
BenchmarkCollection_Query_5000-8               	     303	   3662712 ns/op	  674161 B/op	    5067 allocs/op
BenchmarkCollection_Query_25000-8              	      60	  17388024 ns/op	 3775648 B/op	   25073 allocs/op
BenchmarkCollection_Query_100000-8             	      16	  68538502 ns/op	16176056 B/op	  100079 allocs/op
PASS
ok  	github.com/philippgille/chromem-go	33.631s
@philippgille philippgille merged commit 7b3595b into main Mar 17, 2024
@philippgille philippgille deleted the use-max-heap branch March 17, 2024 15:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

2 participants