Skip to content

Commit 5fdcc09

Browse files
jmooringbep
authored andcommitted
markup/goldmark: Sanitize TOC heading titles
Fixes #13401
1 parent f5245a7 commit 5fdcc09

File tree

4 files changed

+110
-42
lines changed

4 files changed

+110
-42
lines changed

‎go.mod‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ require (
123123
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 // indirect
124124
github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 // indirect
125125
github.com/aws/smithy-go v1.22.2 // indirect
126+
github.com/aymerick/douceur v0.2.0 // indirect
126127
github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f // indirect
127128
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
128129
github.com/dlclark/regexp2 v1.11.5 // indirect
@@ -141,6 +142,7 @@ require (
141142
github.com/google/wire v0.6.0 // indirect
142143
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
143144
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
145+
github.com/gorilla/css v1.0.1 // indirect
144146
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
145147
github.com/inconshreveable/mousetrap v1.1.0 // indirect
146148
github.com/jmespath/go-jmespath v0.4.0 // indirect
@@ -151,6 +153,7 @@ require (
151153
github.com/mailru/easyjson v0.7.7 // indirect
152154
github.com/mattn/go-colorable v0.1.13 // indirect
153155
github.com/mattn/go-runewidth v0.0.16 // indirect
156+
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
154157
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
155158
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
156159
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 // indirect

‎go.sum‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 h1:ZsDKRLXGWHk8WdtyYMoGNO7bTudr
137137
github.com/aws/aws-sdk-go-v2/service/sts v1.30.3/go.mod h1:zwySh8fpFyXp9yOr/KVzxOl8SRqgf/IDw5aUt9UKFcQ=
138138
github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
139139
github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
140+
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
141+
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
140142
github.com/bep/clocks v0.5.0 h1:hhvKVGLPQWRVsBP/UB7ErrHYIO42gINVbvqxvYTPVps=
141143
github.com/bep/clocks v0.5.0/go.mod h1:SUq3q+OOq41y2lRQqH5fsOoxN8GbxSiT6jvoVVLCVhU=
142144
github.com/bep/debounce v1.2.0 h1:wXds8Kq8qRfwAOpAxHrJDbCXgC5aHSzgQb/0gKsHQqo=
@@ -352,6 +354,8 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
352354
github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3GqO0k0=
353355
github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w=
354356
github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
357+
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
358+
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
355359
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
356360
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
357361
github.com/hairyhenderson/go-codeowners v0.7.0 h1:s0W4wF8bdsBEjTWzwzSlsatSthWtTAF2xLgo4a4RwAo=
@@ -406,6 +410,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
406410
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
407411
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
408412
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
413+
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
414+
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
409415
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c h1:cqn374mizHuIWj+OSJCajGr/phAmuMug9qIX3l9CflE=
410416
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
411417
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=

‎markup/goldmark/toc.go‎

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ package goldmark
1515

1616
import (
1717
"bytes"
18+
"regexp"
19+
"strings"
1820

21+
"github.com/microcosm-cc/bluemonday"
1922
strikethroughAst "github.com/yuin/goldmark/extension/ast"
2023

2124
emojiAst "github.com/yuin/goldmark-emoji/ast"
@@ -61,7 +64,7 @@ func (t *tocTransformer) Transform(n *ast.Document, reader text.Reader, pc parse
6164
s := ast.WalkStatus(ast.WalkContinue)
6265
if n.Kind() == ast.KindHeading {
6366
if inHeading && !entering {
64-
tocHeading.Title = headingText.String()
67+
tocHeading.Title = sanitizeTOCHeadingTitle(headingText.String())
6568
headingText.Reset()
6669
toc.AddAt(tocHeading, row, level-1)
6770
tocHeading = &tableofcontents.Heading{}
@@ -139,3 +142,40 @@ func (e *tocExtension) Extend(m goldmark.Markdown) {
139142
// This must run after the ID generation (priority 100).
140143
110)))
141144
}
145+
146+
var tocSanitizerPolicy = newTOCSanitizerPolicy()
147+
148+
// newTOCSanitizerPolicy returns a bluemonday policy for sanitizing TOC heading
149+
// titles against an allowlist of inline HTML elements and attributes,
150+
// specifically excluding anchor elements to prevent links within TOC heading
151+
// titles.
152+
func newTOCSanitizerPolicy() *bluemonday.Policy {
153+
p := bluemonday.NewPolicy()
154+
p.AllowElements(
155+
"abbr", "b", "bdi", "bdo", "br", "cite", "code", "data", "del", "dfn",
156+
"em", "i", "ins", "kbd", "mark", "q", "rp", "rt", "ruby", "s", "samp",
157+
"small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr",
158+
)
159+
p.AllowStandardAttributes()
160+
p.AllowStyling()
161+
p.AllowImages()
162+
p.AllowAttrs("cite").OnElements("del", "ins", "q")
163+
p.AllowAttrs("datetime").OnElements("del", "ins", "time")
164+
p.AllowAttrs("value").OnElements("data")
165+
return p
166+
}
167+
168+
var whiteSpaceRe = regexp.MustCompile(`\s+`)
169+
170+
// sanitizeTOCHeadingTitle sanitizes s for use as a TOC heading title.
171+
func sanitizeTOCHeadingTitle(s string) string {
172+
if strings.IndexByte(s, '<') == -1 {
173+
return s
174+
}
175+
176+
// Sanitize the string.
177+
ss := tocSanitizerPolicy.Sanitize(s)
178+
179+
// Remove extraneous whitespace.
180+
return whiteSpaceRe.ReplaceAllString(strings.TrimSpace(ss), " ")
181+
}

‎markup/goldmark/toc_integration_test.go‎

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ title: p6 (strikethrough)
9090
title: p7 (emoji)
9191
---
9292
## A :snake: emoji
93+
-- content/p8.md --
94+
---
95+
title: p8 (link)
96+
---
97+
## A [link](https://example.org)
9398
`
9499

95100
b := hugolib.Test(t, files)
@@ -111,36 +116,41 @@ title: p7 (emoji)
111116
</nav>`)
112117

113118
// markdown
114-
b.AssertFileContent("public/p2/index.html", `<nav id="TableOfContents">
115-
<li><a href="#">Some <em>emphasized</em> text</a></li>
116-
<li><a href="#">Some <code>inline</code> code</a></li>
117-
<li><a href="#">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>
118-
`)
119+
b.AssertFileContent("public/p2/index.html",
120+
`<li><a href="#">Some <em>emphasized</em> text</a></li>`,
121+
`<li><a href="#">Some <code>inline</code> code</a></li>`,
122+
`<li><a href="#">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>`,
123+
)
119124

120125
// image
121-
b.AssertFileContent("public/p3/index.html", `
122-
<li><a href="#">An image <img src="a.jpg" alt="kitten"></a></li>
123-
`)
126+
b.AssertFileContent("public/p3/index.html",
127+
`<li><a href="#">An image <img src="a.jpg" alt="kitten"></a></li>`,
128+
)
124129

125130
// raw html
126-
b.AssertFileContent("public/p4/index.html", `
127-
<li><a href="#">Some <!-- raw HTML omitted -->raw<!-- raw HTML omitted --> HTML</a></li>
128-
`)
131+
b.AssertFileContent("public/p4/index.html",
132+
`<li><a href="#">Some raw HTML</a></li>`,
133+
)
129134

130135
// typographer
131-
b.AssertFileContent("public/p5/index.html", `
132-
<li><a href="#">Some &quot;typographer&quot; markup</a></li>
133-
`)
136+
b.AssertFileContent("public/p5/index.html",
137+
`<li><a href="#">Some &quot;typographer&quot; markup</a></li>`,
138+
)
134139

135140
// strikethrough
136-
b.AssertFileContent("public/p6/index.html", `
137-
<li><a href="#">Some ~~deleted~~ text</a></li>
138-
`)
141+
b.AssertFileContent("public/p6/index.html",
142+
`<li><a href="#">Some ~~deleted~~ text</a></li>`,
143+
)
139144

140145
// emoji
141-
b.AssertFileContent("public/p7/index.html", `
142-
<li><a href="#">A :snake: emoji</a></li>
143-
`)
146+
b.AssertFileContent("public/p7/index.html",
147+
`<li><a href="#">A :snake: emoji</a></li>`,
148+
)
149+
150+
// link
151+
b.AssertFileContent("public/p8/index.html",
152+
`<li><a href="#">A link</a></li>`,
153+
)
144154
}
145155

146156
func TestTableOfContentsAdvanced(t *testing.T) {
@@ -214,6 +224,11 @@ title: p6 (strikethrough)
214224
title: p7 (emoji)
215225
---
216226
## A :snake: emoji
227+
-- content/p8.md --
228+
---
229+
title: p8 (link)
230+
---
231+
## A [link](https://example.org)
217232
`
218233

219234
b := hugolib.Test(t, files)
@@ -231,37 +246,41 @@ title: p7 (emoji)
231246
</nav>`)
232247

233248
// markdown
234-
b.AssertFileContent("public/p2/index.html", `<nav id="TableOfContents">
235-
<li><a href="#some-emphasized-text">Some <em>emphasized</em> text</a></li>
236-
<li><a href="#some-inline-code">Some <code>inline</code> code</a></li>
237-
<li><a href="#something-to-escape-a--b--c--b">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>
238-
`)
249+
b.AssertFileContent("public/p2/index.html",
250+
`<li><a href="#some-emphasized-text">Some <em>emphasized</em> text</a></li>`,
251+
`<li><a href="#some-inline-code">Some <code>inline</code> code</a></li>`,
252+
`<li><a href="#something-to-escape-a--b--c--b">Something to escape A &lt; B &amp;&amp; C &gt; B</a></li>`,
253+
)
239254

240255
// image
241-
b.AssertFileContent("public/p3/index.html", `
242-
<li><a href="#an-image-kitten">An image <img src="a.jpg" alt="kitten" /></a></li>
243-
`)
256+
b.AssertFileContent("public/p3/index.html",
257+
`<li><a href="#an-image-kitten">An image <img src="a.jpg" alt="kitten"/></a></li>`,
258+
)
244259

245260
// raw html
246-
b.AssertFileContent("public/p4/index.html", `
247-
<li><a href="#some-raw-html">Some <span>raw</span> HTML</a></li>
248-
`)
261+
b.AssertFileContent("public/p4/index.html",
262+
`<li><a href="#some-raw-html">Some <span>raw</span> HTML</a></li>`,
263+
)
249264

250265
// typographer
251-
b.AssertFileContent("public/p5/index.html", `
252-
<li><a href="#some-typographer-markup">Some &ldquo;typographer&rdquo; markup</a></li>
253-
`)
266+
b.AssertFileContent("public/p5/index.html",
267+
`<li><a href="#some-typographer-markup">Some &ldquo;typographer&rdquo; markup</a></li>`,
268+
)
254269

255270
// strikethrough
256-
b.AssertFileContent("public/p6/index.html", `
257-
<li><a href="#some-deleted-text">Some <del>deleted</del> text</a></li>
258-
`)
271+
b.AssertFileContent("public/p6/index.html",
272+
`<li><a href="#some-deleted-text">Some <del>deleted</del> text</a></li>`,
273+
)
259274

260275
// emoji
261-
262-
b.AssertFileContent("public/p7/index.html", `
263-
<li><a href="#a-snake-emoji">A &#x1f40d; emoji</a></li>
264-
`)
276+
b.AssertFileContent("public/p7/index.html",
277+
`<li><a href="#a-snake-emoji">A &#x1f40d; emoji</a></li>`,
278+
)
279+
280+
// link
281+
b.AssertFileContent("public/p8/index.html",
282+
`<li><a href="#a-link">A link</a></li>`,
283+
)
265284
}
266285

267286
func TestIssue13416(t *testing.T) {

0 commit comments

Comments
 (0)