Skip to content

Commit c5dca3b

Browse files
committed
Add transform.HTMLToMarkdown
Fixes #13946
1 parent ec463c0 commit c5dca3b

File tree

4 files changed

+99
-1
lines changed

4 files changed

+99
-1
lines changed

‎go.mod‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ require (
105105
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 // indirect
106106
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect
107107
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect
108+
github.com/JohannesKaufmann/dom v0.2.0 // indirect
109+
github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0 // indirect
108110
github.com/aws/aws-sdk-go v1.55.7 // indirect
109111
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect
110112
github.com/aws/aws-sdk-go-v2/config v1.29.17 // indirect
@@ -162,7 +164,7 @@ require (
162164
github.com/perimeterx/marshmallow v1.1.5 // indirect
163165
github.com/pkg/errors v0.9.1 // indirect
164166
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
165-
github.com/rivo/uniseg v0.2.0 // indirect
167+
github.com/rivo/uniseg v0.4.7 // indirect
166168
github.com/russross/blackfriday/v2 v2.1.0 // indirect
167169
github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect
168170
github.com/woodsbury/decimal128 v1.3.0 // indirect

‎go.sum‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0
9191
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0/go.mod h1:jUZ5LYlw40WMd07qxcQJD5M40aUxrfwqQX1g7zxYnrQ=
9292
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 h1:Ron4zCA/yk6U7WOBXhTJcDpsUBG9npumK6xw2auFltQ=
9393
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0/go.mod h1:cSgYe11MCNYunTnRXrKiR/tHc0eoKjICUuWpNZoVCOo=
94+
github.com/JohannesKaufmann/dom v0.2.0 h1:1bragmEb19K8lHAqgFgqCpiPCFEZMTXzOIEjuxkUfLQ=
95+
github.com/JohannesKaufmann/dom v0.2.0/go.mod h1:57iSUl5RKric4bUkgos4zu6Xt5LMHUnw3TF1l5CbGZo=
96+
github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0 h1:C0/TerKdQX9Y9pbYi1EsLr5LDNANsqunyI/btpyfCg8=
97+
github.com/JohannesKaufmann/html-to-markdown/v2 v2.4.0/go.mod h1:OLaKh+giepO8j7teevrNwiy/fwf8LXgoc9g7rwaE1jk=
9498
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
9599
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
96100
github.com/alecthomas/chroma/v2 v2.20.0 h1:sfIHpxPyR07/Oylvmcai3X/exDlE8+FA820NTz+9sGw=
@@ -465,6 +469,8 @@ github.com/redis/go-redis/v9 v9.8.0 h1:q3nRvjrlge/6UD7eTu/DSg2uYiU2mCL0G/uzBWqhi
465469
github.com/redis/go-redis/v9 v9.8.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
466470
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
467471
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
472+
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
473+
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
468474
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
469475
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
470476
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=

‎tpl/transform/transform.go‎

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ import (
2727
"strings"
2828
"sync/atomic"
2929

30+
htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
31+
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
32+
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
33+
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/table"
34+
3035
bp "github.com/gohugoio/hugo/bufferpool"
3136

3237
"github.com/bep/goportabletext"
@@ -324,3 +329,32 @@ func (ns *Namespace) ToMath(ctx context.Context, args ...any) (template.HTML, er
324329
func (ns *Namespace) Reset() {
325330
ns.cacheUnmarshal.Clear()
326331
}
332+
333+
// This was added in Hugo v0.151.0 and should be considered experimental for now.
334+
// We need to test this out in the wild for a while before committing to this API,
335+
// and there will eventually be more options here.
336+
func (ns *Namespace) HTMLToMarkdown(ctx context.Context, args ...any) (string, error) {
337+
if len(args) < 1 {
338+
return "", errors.New("must provide at least one argument")
339+
}
340+
input, err := cast.ToStringE(args[0])
341+
if err != nil {
342+
return "", err
343+
}
344+
345+
plugins := []htmltomarkdown.Plugin{
346+
base.NewBasePlugin(),
347+
commonmark.NewCommonmarkPlugin(),
348+
table.NewTablePlugin(),
349+
}
350+
351+
conv := htmltomarkdown.NewConverter(
352+
htmltomarkdown.WithPlugins(plugins...),
353+
)
354+
355+
markdown, err := conv.ConvertString(input)
356+
if err != nil {
357+
return "", err
358+
}
359+
return markdown, nil
360+
}

‎tpl/transform/transform_integration_test.go‎

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,3 +535,59 @@ disableKinds = ['page','rss','section','sitemap','taxonomy','term']
535535
b, err = hugolib.TestE(t, f)
536536
b.Assert(err.Error(), qt.Contains, "invalid strict mode")
537537
}
538+
539+
func TestHTMLToMarkdown(t *testing.T) {
540+
t.Parallel()
541+
542+
markdown := `
543+
# Heading
544+
545+
Some **bold** text.
546+
547+
A [link](https://example.com).
548+
549+
An image:
550+
551+
![alt text](https://example.com/image.jpg "Image Title")
552+
553+
A list:
554+
555+
- Item 1
556+
- Item 2
557+
- Item 2a
558+
- Item 2b
559+
560+
A table:
561+
562+
| Header 1 | Header 2 |
563+
|----------|----------|
564+
| Cell 1 | Cell 2 |
565+
| Cell 3 | Cell 4 |
566+
567+
A blockquote:
568+
569+
> This is a quote.
570+
`
571+
files := `
572+
-- hugo.toml --
573+
disableKinds = ['rss','section','sitemap','taxonomy','term']
574+
-- layouts/all.html --
575+
All html.
576+
-- layouts/all.markdown --
577+
{{ .Content | transform.HTMLToMarkdown | safeHTML }}
578+
-- content/p1.md --
579+
---
580+
title: p1
581+
outputs: ["html", "markdown"]
582+
---
583+
`
584+
585+
files += markdown
586+
587+
b := hugolib.Test(t, files)
588+
589+
b.AssertFileContent("public/p1/index.html", `All html.`)
590+
591+
// There are some white space differences, so we cannot do an exact match.
592+
b.AssertFileContent("public/p1/index.md", markdown)
593+
}

0 commit comments

Comments
 (0)