Skip to content

Commit ee61aab

Browse files
committed
Make truncate work with unicode
Add test cases for some edge cases and japanese characters
1 parent a6caf55 commit ee61aab

File tree

4 files changed

+242
-185
lines changed

4 files changed

+242
-185
lines changed

‎tpl/template_func_truncate.go‎

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Copyright 2016 The Hugo Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package tpl
15+
16+
import (
17+
"errors"
18+
"html"
19+
"html/template"
20+
"regexp"
21+
"unicode"
22+
"unicode/utf8"
23+
24+
"github.com/spf13/cast"
25+
)
26+
27+
var (
28+
tagRE = regexp.MustCompile(`^<(/)?([^ ]+?)(?:(\s*/)| .*?)?>`)
29+
htmlSinglets = map[string]bool{
30+
"br": true, "col": true, "link": true,
31+
"base": true, "img": true, "param": true,
32+
"area": true, "hr": true, "input": true,
33+
}
34+
)
35+
36+
type openTag struct {
37+
name string
38+
pos int
39+
}
40+
41+
func truncate(a interface{}, options ...interface{}) (template.HTML, error) {
42+
length, err := cast.ToIntE(a)
43+
if err != nil {
44+
return "", err
45+
}
46+
var textParam interface{}
47+
var ellipsis template.HTML
48+
49+
switch len(options) {
50+
case 0:
51+
return "", errors.New("truncate requires a length and a string")
52+
case 1:
53+
textParam = options[0]
54+
ellipsis = " …"
55+
case 2:
56+
textParam = options[1]
57+
var ok bool
58+
if ellipsis, ok = options[0].(template.HTML); !ok {
59+
s, e := cast.ToStringE(options[0])
60+
if e != nil {
61+
return "", errors.New("ellipsis must be a string")
62+
}
63+
ellipsis = template.HTML(html.EscapeString(s))
64+
}
65+
default:
66+
return "", errors.New("too many arguments passed to truncate")
67+
}
68+
if err != nil {
69+
return "", errors.New("text to truncate must be a string")
70+
}
71+
text, err := cast.ToStringE(textParam)
72+
if err != nil {
73+
return "", errors.New("text must be a string")
74+
}
75+
76+
if html, ok := textParam.(template.HTML); ok {
77+
return truncateHTML(length, ellipsis, html)
78+
}
79+
80+
if utf8.RuneCountInString(text) <= length {
81+
return template.HTML(html.EscapeString(text)), nil
82+
}
83+
84+
var lastWordIndex, lastNonSpace, currentLen int
85+
for i, r := range text {
86+
currentLen++
87+
if unicode.IsSpace(r) {
88+
lastWordIndex = lastNonSpace
89+
} else if unicode.In(r, unicode.Han, unicode.Hangul, unicode.Hiragana, unicode.Katakana) {
90+
lastWordIndex = i
91+
} else {
92+
lastNonSpace = i + utf8.RuneLen(r)
93+
}
94+
if currentLen > length {
95+
if lastWordIndex == 0 {
96+
return template.HTML(html.EscapeString(text[0:i])) + ellipsis, nil
97+
}
98+
return template.HTML(html.EscapeString(text[0:lastWordIndex])) + ellipsis, nil
99+
}
100+
}
101+
102+
return template.HTML(html.EscapeString(text)), nil
103+
}
104+
105+
func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) {
106+
if utf8.RuneCountInString(string(text)) <= length {
107+
return text, nil
108+
}
109+
110+
openTags := []openTag{}
111+
112+
var lastWordIndex, lastNonSpace, currentLen, endTextPos, nextTag int
113+
for i, r := range text {
114+
if i < nextTag {
115+
continue
116+
}
117+
slice := string(text[i:])
118+
m := tagRE.FindStringSubmatchIndex(slice)
119+
if len(m) > 0 && m[0] == 0 {
120+
tagname := slice[m[4]:m[5]]
121+
if m[2] == -1 {
122+
openTags = append(openTags, openTag{name: tagname, pos: i})
123+
} else {
124+
// SGML: An end tag closes, back to the matching start tag,
125+
// all unclosed intervening start tags with omitted end tags
126+
for i, tag := range openTags {
127+
if tag.name == tagname {
128+
openTags = openTags[i:]
129+
break
130+
}
131+
}
132+
}
133+
nextTag = i + m[1]
134+
continue
135+
}
136+
137+
currentLen++
138+
if unicode.IsSpace(r) {
139+
lastWordIndex = lastNonSpace
140+
} else if unicode.In(r, unicode.Han, unicode.Hangul, unicode.Hiragana, unicode.Katakana) {
141+
lastWordIndex = i
142+
} else {
143+
lastNonSpace = i + utf8.RuneLen(r)
144+
}
145+
if currentLen > length {
146+
if lastWordIndex == 0 {
147+
endTextPos = i
148+
} else {
149+
endTextPos = lastWordIndex
150+
}
151+
out := text[0:endTextPos] + ellipsis
152+
for _, tag := range openTags {
153+
if tag.pos > endTextPos {
154+
break
155+
}
156+
out += ("</" + template.HTML(tag.name) + ">")
157+
}
158+
159+
return out, nil
160+
}
161+
}
162+
163+
return text, nil
164+
}

‎tpl/template_func_truncate_test.go‎

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright 2016 The Hugo Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package tpl
15+
16+
import (
17+
"html/template"
18+
"reflect"
19+
"testing"
20+
)
21+
22+
func TestTruncate(t *testing.T) {
23+
var err error
24+
cases := []struct {
25+
v1 interface{}
26+
v2 interface{}
27+
v3 interface{}
28+
want interface{}
29+
isErr bool
30+
}{
31+
{10, "I am a test sentence", nil, template.HTML("I am a …"), false},
32+
{10, "", "I am a test sentence", template.HTML("I am a"), false},
33+
{10, "", "a b c d e f g h i j k", template.HTML("a b c d e"), false},
34+
{12, "", "<b>Should be escaped</b>", template.HTML("&lt;b&gt;Should be"), false},
35+
{10, template.HTML(" <a href='#'>Read more</a>"), "I am a test sentence", template.HTML("I am a <a href='#'>Read more</a>"), false},
36+
{20, template.HTML("I have a <a href='/markdown'>Markdown link</a> inside."), nil, template.HTML("I have a <a href='/markdown'>Markdown …</a>"), false},
37+
{10, "IamanextremelylongwordthatjustgoesonandonandonjusttoannoyyoualmostasifIwaswritteninGermanActuallyIbettheresagermanwordforthis", nil, template.HTML("Iamanextre …"), false},
38+
{10, template.HTML("<p>IamanextremelylongwordthatjustgoesonandonandonjusttoannoyyoualmostasifIwaswritteninGermanActuallyIbettheresagermanwordforthis</p>"), nil, template.HTML("<p>Iamanextre …</p>"), false},
39+
{13, template.HTML("With <a href=\"/markdown\">Markdown</a> inside."), nil, template.HTML("With <a href=\"/markdown\">Markdown …</a>"), false},
40+
{14, "Hello中国 Good 好的", nil, template.HTML("Hello中国 Good 好 …"), false},
41+
{14, template.HTML("<p>Hello中国 Good 好的</p>"), nil, template.HTML("<p>Hello中国 Good 好 …</p>"), false},
42+
{10, nil, nil, template.HTML(""), true},
43+
{nil, nil, nil, template.HTML(""), true},
44+
}
45+
for i, c := range cases {
46+
var result template.HTML
47+
if c.v2 == nil {
48+
result, err = truncate(c.v1)
49+
} else if c.v3 == nil {
50+
result, err = truncate(c.v1, c.v2)
51+
} else {
52+
result, err = truncate(c.v1, c.v2, c.v3)
53+
}
54+
55+
if c.isErr {
56+
if err == nil {
57+
t.Errorf("[%d] Slice didn't return an expected error", i)
58+
}
59+
} else {
60+
if err != nil {
61+
t.Errorf("[%d] failed: %s", i, err)
62+
continue
63+
}
64+
if !reflect.DeepEqual(result, c.want) {
65+
t.Errorf("[%d] got '%s' but expected '%s'", i, result, c.want)
66+
}
67+
}
68+
}
69+
70+
// Too many arguments
71+
_, err = truncate(10, " ...", "I am a test sentence", "wrong")
72+
if err == nil {
73+
t.Errorf("Should have errored")
74+
}
75+
76+
}

‎tpl/template_funcs.go‎

Lines changed: 1 addition & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ import (
3838
"strings"
3939
"sync"
4040
"time"
41-
"unicode"
4241
"unicode/utf8"
4342

4443
"github.com/bep/inflect"
@@ -56,14 +55,7 @@ import (
5655
)
5756

5857
var (
59-
funcMap template.FuncMap
60-
tagRE = regexp.MustCompile(`(?s)<(/)?([^ ]+?)(?:(\s*/)| .*?)?>`)
61-
htmlRE = regexp.MustCompile(`(?s)<.*?>|((?:\w[-\w]*|&.*?;)+)`)
62-
htmlSinglets = map[string]bool{
63-
"br": true, "col": true, "link": true,
64-
"base": true, "img": true, "param": true,
65-
"area": true, "hr": true, "input": true,
66-
}
58+
funcMap template.FuncMap
6759
)
6860

6961
// eq returns the boolean truth of arg1 == arg2.
@@ -247,130 +239,6 @@ func slicestr(a interface{}, startEnd ...interface{}) (string, error) {
247239

248240
}
249241

250-
func truncate(a interface{}, options ...interface{}) (template.HTML, error) {
251-
length, err := cast.ToIntE(a)
252-
if err != nil {
253-
return "", err
254-
}
255-
var textParam interface{}
256-
var ellipsis template.HTML
257-
258-
switch len(options) {
259-
case 0:
260-
return "", errors.New("truncate requires a length and a string")
261-
case 1:
262-
textParam = options[0]
263-
ellipsis = " …"
264-
case 2:
265-
textParam = options[1]
266-
var ok bool
267-
if ellipsis, ok = options[0].(template.HTML); !ok {
268-
s, e := cast.ToStringE(options[0])
269-
if e != nil {
270-
return "", errors.New("ellipsis must be a string")
271-
}
272-
ellipsis = template.HTML(html.EscapeString(s))
273-
}
274-
default:
275-
return "", errors.New("too many arguments passed to truncate")
276-
}
277-
if err != nil {
278-
return "", errors.New("text to truncate must be a string")
279-
}
280-
text, err := cast.ToStringE(textParam)
281-
if err != nil {
282-
return "", errors.New("text must be a string")
283-
}
284-
285-
if html, ok := textParam.(template.HTML); ok {
286-
return truncateHTML(length, ellipsis, html)
287-
}
288-
289-
if len(text) <= length {
290-
return template.HTML(html.EscapeString(text)), nil
291-
}
292-
293-
var lastWordIndex int
294-
var lastNonSpace int
295-
for i, r := range text {
296-
if unicode.IsSpace(r) {
297-
lastWordIndex = lastNonSpace
298-
} else {
299-
lastNonSpace = i
300-
}
301-
if i >= length {
302-
return template.HTML(html.EscapeString(text[0:lastWordIndex+1])) + ellipsis, nil
303-
}
304-
}
305-
306-
return template.HTML(html.EscapeString(text)), nil
307-
}
308-
309-
func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) {
310-
if len(text) <= length {
311-
return text, nil
312-
}
313-
314-
var pos, endTextPos, currentLen int
315-
openTags := []string{}
316-
317-
for currentLen < length {
318-
slice := string(text[pos:])
319-
m := htmlRE.FindStringSubmatchIndex(slice)
320-
if len(m) == 0 {
321-
// Checked through whole string
322-
break
323-
}
324-
325-
pos += m[1]
326-
if len(m) == 4 && m[3]-m[2] > 0 {
327-
// It's an actual non-HTML word or char
328-
currentLen += (m[3] - m[2]) + 1 // 1 space between each word
329-
if currentLen >= length {
330-
endTextPos = pos
331-
}
332-
continue
333-
}
334-
335-
tag := tagRE.FindStringSubmatch(slice[m[0]:m[1]])
336-
if len(tag) == 0 || currentLen >= length {
337-
// Don't worry about non tags or tags after our truncate point
338-
continue
339-
}
340-
closingTag := tag[1]
341-
tagname := strings.ToLower(tag[2])
342-
selfClosing := tag[3]
343-
344-
_, singlet := htmlSinglets[tagname]
345-
if !singlet && selfClosing == "" {
346-
if closingTag == "" {
347-
// Add it to the start of the open tags list
348-
openTags = append([]string{tagname}, openTags...)
349-
} else {
350-
for i, tag := range openTags {
351-
if tag == tagname {
352-
// SGML: An end tag closes, back to the matching start tag,
353-
// all unclosed intervening start tags with omitted end tags
354-
openTags = openTags[i+1:]
355-
break
356-
}
357-
}
358-
}
359-
}
360-
}
361-
362-
if currentLen < length {
363-
return text, nil
364-
}
365-
366-
out := text[0:endTextPos]
367-
out += ellipsis
368-
for _, tag := range openTags {
369-
out += ("</" + template.HTML(tag) + ">")
370-
}
371-
return out, nil
372-
}
373-
374242
// hasPrefix tests whether the input s begins with prefix.
375243
func hasPrefix(s, prefix interface{}) (bool, error) {
376244
ss, err := cast.ToStringE(s)

0 commit comments

Comments
 (0)