Skip to content

Commit d5eda13

Browse files
committed
Replace the MD5 hashing of images with xxHash
Note that we only use this for change detection. The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective: ``` name old time/op new time/op delta HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4) name old alloc/op new alloc/op delta HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4) ```
1 parent 8b5d796 commit d5eda13

File tree

7 files changed

+180
-29
lines changed

7 files changed

+180
-29
lines changed

‎common/hashing/hashing.go‎

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright 2024 The Hugo Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
// Package hashing provides common hashing utilities.
15+
package hashing
16+
17+
import (
18+
"encoding/hex"
19+
"io"
20+
"sync"
21+
22+
"github.com/cespare/xxhash/v2"
23+
)
24+
25+
// XXHashFromReader calculates the xxHash for the given reader.
26+
func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) {
27+
h := getXxHashReadFrom()
28+
defer putXxHashReadFrom(h)
29+
30+
size, err := io.Copy(h, r)
31+
if err != nil {
32+
return 0, 0, err
33+
}
34+
return h.Sum64(), size, nil
35+
}
36+
37+
// XXHashFromString calculates the xxHash for the given string.
38+
func XXHashFromString(s string) (uint64, error) {
39+
h := xxhash.New()
40+
h.WriteString(s)
41+
return h.Sum64(), nil
42+
}
43+
44+
// XxHashFromStringHexEncoded calculates the xxHash for the given string
45+
// and returns the hash as a hex encoded string.
46+
func XxHashFromStringHexEncoded(f string) string {
47+
h := xxhash.New()
48+
h.WriteString(f)
49+
hash := h.Sum(nil)
50+
return hex.EncodeToString(hash)
51+
}
52+
53+
type xxhashReadFrom struct {
54+
buff []byte
55+
*xxhash.Digest
56+
}
57+
58+
func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) {
59+
for {
60+
n, err := r.Read(x.buff)
61+
if n > 0 {
62+
x.Digest.Write(x.buff[:n])
63+
}
64+
if err != nil {
65+
if err == io.EOF {
66+
err = nil
67+
}
68+
return int64(n), err
69+
}
70+
}
71+
}
72+
73+
var xXhashReadFromPool = sync.Pool{
74+
New: func() any {
75+
return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)}
76+
},
77+
}
78+
79+
func getXxHashReadFrom() *xxhashReadFrom {
80+
return xXhashReadFromPool.Get().(*xxhashReadFrom)
81+
}
82+
83+
func putXxHashReadFrom(h *xxhashReadFrom) {
84+
h.Reset()
85+
xXhashReadFromPool.Put(h)
86+
}

‎common/hashing/hashing_test.go‎

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright 2024 The Hugo Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package hashing
15+
16+
import (
17+
"strings"
18+
"testing"
19+
20+
"github.com/cespare/xxhash/v2"
21+
qt "github.com/frankban/quicktest"
22+
)
23+
24+
func TestXxHashFromReader(t *testing.T) {
25+
c := qt.New(t)
26+
s := "Hello World"
27+
r := strings.NewReader(s)
28+
got, size, err := XXHashFromReader(r)
29+
c.Assert(err, qt.IsNil)
30+
c.Assert(size, qt.Equals, int64(len(s)))
31+
c.Assert(got, qt.Equals, uint64(7148569436472236994))
32+
}
33+
34+
func TestXxHashFromString(t *testing.T) {
35+
c := qt.New(t)
36+
s := "Hello World"
37+
got, err := XXHashFromString(s)
38+
c.Assert(err, qt.IsNil)
39+
c.Assert(got, qt.Equals, uint64(7148569436472236994))
40+
}
41+
42+
func TestXxHashFromStringHexEncoded(t *testing.T) {
43+
c := qt.New(t)
44+
s := "The quick brown fox jumps over the lazy dog"
45+
got := XxHashFromStringHexEncoded(s)
46+
// Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog
47+
c.Assert(got, qt.Equals, "0b242d361fda71bc")
48+
}
49+
50+
func BenchmarkXXHashFromReader(b *testing.B) {
51+
r := strings.NewReader("Hello World")
52+
b.ResetTimer()
53+
for i := 0; i < b.N; i++ {
54+
XXHashFromReader(r)
55+
r.Seek(0, 0)
56+
}
57+
}
58+
59+
func BenchmarkXXHashFromString(b *testing.B) {
60+
s := "Hello World"
61+
b.ResetTimer()
62+
for i := 0; i < b.N; i++ {
63+
XXHashFromString(s)
64+
}
65+
}
66+
67+
func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
68+
s := "The quick brown fox jumps over the lazy dog"
69+
b.ResetTimer()
70+
for i := 0; i < b.N; i++ {
71+
XxHashFromStringHexEncoded(s)
72+
}
73+
}
74+
75+
func xxHashFromString(f string) uint64 {
76+
h := xxhash.New()
77+
h.WriteString(f)
78+
return h.Sum64()
79+
}

‎helpers/general.go‎

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,11 @@ import (
2727
"unicode"
2828
"unicode/utf8"
2929

30-
"github.com/cespare/xxhash/v2"
30+
bp "github.com/gohugoio/hugo/bufferpool"
31+
3132
"github.com/spf13/afero"
3233

3334
"github.com/jdkato/prose/transform"
34-
35-
bp "github.com/gohugoio/hugo/bufferpool"
3635
)
3736

3837
// FilePathSeparator as defined by os.Separator.
@@ -258,13 +257,7 @@ func SliceToLower(s []string) []string {
258257
return l
259258
}
260259

261-
// XxHashString takes a string and returns its xxHash hash.
262-
func XxHashString(f string) string {
263-
h := xxhash.New()
264-
h.WriteString(f)
265-
hash := h.Sum(nil)
266-
return hex.EncodeToString(hash)
267-
}
260+
// XXHashFromReader creates a xxHash hash from the given reader.
268261

269262
// MD5String takes a string and returns its MD5 hash.
270263
func MD5String(f string) string {

‎resources/image.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ func (i *imageResource) relTargetPathFromConfig(conf images.ImageConfig) interna
493493
}
494494

495495
h := i.hash()
496-
idStr := fmt.Sprintf("_hu%s_%d", h, i.size())
496+
idStr := fmt.Sprintf("_hu%d_%d", h, i.size())
497497

498498
// Do not change for no good reason.
499499
const md5Threshold = 100

‎resources/resource.go‎

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/gohugoio/hugo/identity"
2727
"github.com/gohugoio/hugo/resources/internal"
2828

29+
"github.com/gohugoio/hugo/common/hashing"
2930
"github.com/gohugoio/hugo/common/herrors"
3031
"github.com/gohugoio/hugo/common/paths"
3132

@@ -307,7 +308,7 @@ type fileInfo interface {
307308
}
308309

309310
type hashProvider interface {
310-
hash() string
311+
hash() uint64
311312
}
312313

313314
var _ resource.StaleInfo = (*StaleValue[any])(nil)
@@ -403,7 +404,7 @@ func (l *genericResource) size() int64 {
403404
return l.h.size
404405
}
405406

406-
func (l *genericResource) hash() string {
407+
func (l *genericResource) hash() uint64 {
407408
if err := l.h.init(l); err != nil {
408409
panic(err)
409410
}
@@ -628,15 +629,15 @@ type targetPather interface {
628629
}
629630

630631
type resourceHash struct {
631-
value string
632+
value uint64
632633
size int64
633634
initOnce sync.Once
634635
}
635636

636637
func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
637638
var initErr error
638639
r.initOnce.Do(func() {
639-
var hash string
640+
var hash uint64
640641
var size int64
641642
f, err := l.ReadSeekCloser()
642643
if err != nil {
@@ -656,6 +657,6 @@ func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
656657
return initErr
657658
}
658659

659-
func hashImage(r io.ReadSeeker) (string, int64, error) {
660-
return helpers.MD5FromReaderFast(r)
660+
func hashImage(r io.ReadSeeker) (uint64, int64, error) {
661+
return hashing.XXHashFromReader(r)
661662
}

‎tpl/hash/hash.go‎

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ package hash
1616

1717
import (
1818
"context"
19-
"encoding/hex"
2019
"hash/fnv"
2120

22-
"github.com/cespare/xxhash/v2"
21+
"github.com/gohugoio/hugo/common/hashing"
2322
"github.com/gohugoio/hugo/deps"
2423
"github.com/gohugoio/hugo/tpl/internal"
2524
"github.com/spf13/cast"
@@ -51,14 +50,7 @@ func (ns *Namespace) XxHash(v any) (string, error) {
5150
return "", err
5251
}
5352

54-
hasher := xxhash.New()
55-
56-
_, err = hasher.WriteString(conv)
57-
if err != nil {
58-
return "", err
59-
}
60-
hash := hasher.Sum(nil)
61-
return hex.EncodeToString(hash), nil
53+
return hashing.XxHashFromStringHexEncoded(conv), nil
6254
}
6355

6456
const name = "hash"

‎tpl/tplimpl/template_ast_transformers.go‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ import (
1818
"fmt"
1919
"strings"
2020

21-
"github.com/gohugoio/hugo/helpers"
2221
htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
2322
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
2423

2524
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
2625

26+
"github.com/gohugoio/hugo/common/hashing"
2727
"github.com/gohugoio/hugo/common/maps"
2828
"github.com/gohugoio/hugo/tpl"
2929
"github.com/mitchellh/mapstructure"
@@ -254,7 +254,7 @@ func (c *templateContext) handleDefer(withNode *parse.WithNode) {
254254
c.err = errors.New("resources.PostProcess cannot be used in a deferred template")
255255
return
256256
}
257-
innerHash := helpers.XxHashString(s)
257+
innerHash := hashing.XxHashFromStringHexEncoded(s)
258258
deferredID := tpl.HugoDeferredTemplatePrefix + innerHash
259259

260260
c.deferNodes[deferredID] = inner

0 commit comments

Comments
 (0)