Skip to content

Commit 3d2ff94

Browse files
Merge pull request #134 from JohannesKaufmann/improve-hard-line-break-2
improve-hard-line-break-2
2 parents e128d43 + f6aab8f commit 3d2ff94

17 files changed

+208
-127
lines changed

‎internal/textutils/consecutive_newlines.go‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
11
package textutils
22

33
import (
4+
"bytes"
45
"unicode/utf8"
56
)
67

8+
func TrimUnnecessaryHardLineBreaks(content []byte) []byte {
9+
content = bytes.ReplaceAll(content, []byte(" \n\n"), []byte("\n\n"))
10+
content = bytes.ReplaceAll(content, []byte(" \n \n"), []byte("\n\n"))
11+
content = bytes.ReplaceAll(content, []byte(" \n \n"), []byte("\n\n"))
12+
// out = bytes.ReplaceAll(out, []byte("\n \n"), []byte("\n\n"))
13+
14+
return content
15+
}
16+
717
func TrimConsecutiveNewlines(input []byte) []byte {
818
var result []byte
919
newlineCount := 0

‎internal/textutils/consecutive_newlines_test.go‎

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,32 @@ func TestTrimConsecutiveNewlines(t *testing.T) {
2323

2424
// Double newline cases
2525
{"double newline", "a\n\nb", "a\n\nb"},
26-
{"double newline with spaces", "a \n\nb", "a \n\nb"},
26+
{"double newline with spaces", "a \n\nb", "a\n\nb"},
2727
{"spaces between newlines", "a\n \nb", "a\n \nb"},
28+
// Note: It should not change the spaces *after* the newlines since this could impacts lists
2829
{"spaces after double newline", "a\n\n b", "a\n\n b"},
2930

3031
// Triple+ newline cases
3132
{"triple newline", "a\n\n\nb", "a\n\nb"},
3233
{"quad newline", "a\n\n\n\nb", "a\n\nb"},
33-
{"triple newline with spaces", "a \n\n\nb", "a \n\nb"},
34+
{"triple newline with spaces", "a \n\n\nb", "a\n\nb"},
3435

3536
// Multiple segment cases
3637
{"multiple segments", "a\n\nb\n\nc", "a\n\nb\n\nc"},
37-
{"multiple segments with spaces", "a \n\nb \n\nc", "a \n\nb \n\nc"},
38+
{"multiple segments with spaces", "a \n\nb \n\nc", "a\n\nb\n\nc"},
3839

3940
// Spaces at end of line
4041
{"hard-line-break followed by text", "a \nb", "a \nb"},
41-
{"hard-line-break followed by newline", "a \n\nb", "a \n\nb"},
42+
{"hard-line-break followed by newline", "a \n\nb", "a\n\nb"},
4243

4344
// Edge cases
4445
{"only newlines", "\n\n\n", "\n\n"},
4546
{"only spaces", " ", " "},
4647

4748
{"leading and trailing newlines", "\n\n\ntext\n\n\n", "\n\ntext\n\n"},
48-
{"newlines and spaces", " \n \n \n \n ", " \n \n "},
49+
{"newlines and spaces 1", " \n \n \n \n ", "\n\n "},
50+
{"newlines and spaces 2", "a \n \nb", "a\n\nb"},
51+
{"newlines and spaces 3", "a \n \nb", "a\n\nb"},
4952

5053
{"leading spaces", " a", " a"},
5154
{"leading newline 1", "\na", "\na"},
@@ -60,14 +63,20 @@ func TestTrimConsecutiveNewlines(t *testing.T) {
6063
// UTF-8 cases
6164
{"german special chars", "äöü\n\n\näöü", "äöü\n\näöü"},
6265
{"utf8 chars", "🌟\n\n\n🌟\n\n\n🌟", "🌟\n\n🌟\n\n🌟"},
66+
67+
// Markdown
68+
// Note: The sublist needs to be indented by " -"
69+
{"indented sublist", "- The main list\n \n - The sublist", "- The main list\n \n - The sublist"},
6370
}
6471

6572
for _, tt := range tests {
6673
t.Run(tt.name, func(t *testing.T) {
67-
got := string(TrimConsecutiveNewlines([]byte(tt.input)))
68-
if got != tt.expected {
74+
output := TrimConsecutiveNewlines([]byte(tt.input))
75+
output = TrimUnnecessaryHardLineBreaks(output)
76+
77+
if string(output) != tt.expected {
6978
t.Errorf("\ninput: %q\nexpected: %q\ngot: %q",
70-
tt.input, tt.expected, got,
79+
tt.input, tt.expected, string(output),
7180
)
7281
}
7382
})
@@ -77,49 +86,59 @@ func TestTrimConsecutiveNewlines(t *testing.T) {
7786
func TestTrimConsecutiveNewlines_Allocs(t *testing.T) {
7887
const N = 1000
7988

80-
var avg float64
81-
/*
82-
avg = testing.AllocsPerRun(N, func() {
89+
t.Run("no newlines", func(t *testing.T) {
90+
var expectedAverage float64 = 1
91+
92+
actualAverage := testing.AllocsPerRun(N, func() {
8393
input := []byte("abc")
8494
output := TrimConsecutiveNewlines(input)
8595
_ = output
8696
})
87-
if avg != 0 {
88-
t.Errorf("with no newlines there should be no allocations but got %f", avg)
97+
if actualAverage != expectedAverage {
98+
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
8999
}
100+
})
101+
t.Run("exactly two newlines", func(t *testing.T) {
102+
var expectedAverage float64 = 1
90103

91-
avg = testing.AllocsPerRun(N, func() {
104+
actualAverage := testing.AllocsPerRun(N, func() {
92105
input := []byte("abc\n\nabc")
93106
output := TrimConsecutiveNewlines(input)
94107
_ = output
95108
})
96-
if avg != 0 {
97-
t.Errorf("with only two newlines there should be no allocations but got %f", avg)
109+
if actualAverage != expectedAverage {
110+
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
98111
}
99-
*/
112+
})
113+
t.Run("three newlines", func(t *testing.T) {
114+
var expectedAverage float64 = 1
100115

101-
avg = testing.AllocsPerRun(N, func() {
102-
input := []byte("abc\n\n\nabc")
103-
output := TrimConsecutiveNewlines(input)
104-
_ = output
116+
actualAverage := testing.AllocsPerRun(N, func() {
117+
input := []byte("abc\n\n\nabc")
118+
output := TrimConsecutiveNewlines(input)
119+
_ = output
120+
})
121+
if actualAverage != expectedAverage {
122+
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
123+
}
105124
})
106-
if avg != 1 {
107-
t.Errorf("with three newlines there should be 1 allocation but got %f", avg)
108-
}
125+
t.Run("many newlines", func(t *testing.T) {
126+
var expectedAverage float64 = 16
109127

110-
avg = testing.AllocsPerRun(N, func() {
111-
input := []byte("abc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc")
112-
output := TrimConsecutiveNewlines(input)
113-
_ = output
128+
actualAverage := testing.AllocsPerRun(N, func() {
129+
input := bytes.Repeat([]byte("abc\n\n\n\n\n\nabc"), 1000)
130+
output := TrimConsecutiveNewlines(input)
131+
_ = output
132+
})
133+
if actualAverage != expectedAverage {
134+
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
135+
}
114136
})
115-
if avg != 3 {
116-
t.Errorf("with many newlines there should be 3 allocation but got %f", avg)
117-
}
118137
}
119138

120-
const Repeat = 10
121-
122139
func BenchmarkTrimConsecutiveNewlines(b *testing.B) {
140+
const Repeat = 10
141+
123142
runs := []struct {
124143
desc string
125144
input []byte
Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,56 @@
11
package textutils
22

3-
// EscapeMultiLine deals with multiline content inside a link or a heading.
4-
func EscapeMultiLine(content []byte) []byte {
5-
content = TrimConsecutiveNewlines(content)
3+
import (
4+
"bytes"
5+
"unicode"
6+
)
7+
8+
var (
9+
doubleSpace = []byte{' ', ' '}
610

7-
newContent := make([]byte, 0, len(content))
11+
newlineBreak = []byte{'\n'}
12+
hardLineBreak = []byte{' ', ' ', '\n'}
13+
escapedNoContentLineBreak = []byte{'\\', '\n'}
14+
)
815

9-
startNormal := 0
10-
lineHasContent := false
11-
for index, char := range content {
12-
isNewline := char == '\n'
13-
isSpace := char == ' ' || char == ' '
16+
// EscapeMultiLine deals with multiline content inside a link or a heading.
17+
func EscapeMultiLine(content []byte) []byte {
18+
parts := bytes.Split(content, newlineBreak)
19+
if len(parts) == 1 {
20+
return content
21+
}
1422

15-
isFirstNewline := isNewline && lineHasContent
16-
isLastNewline := isNewline && !lineHasContent
23+
output := make([]byte, 0, len(content))
24+
for i := range parts {
25+
trimmedLeft := bytes.TrimLeftFunc(parts[i], unicode.IsSpace)
1726

18-
if isFirstNewline {
19-
newContent = append(newContent, content[startNormal:index]...)
20-
newContent = append(newContent, '\n')
27+
if len(trimmedLeft) == 0 {
28+
// A blank line would interrupt the link.
29+
// So we need to escape the line
30+
output = append(output, escapedNoContentLineBreak...)
31+
continue
32+
}
2133

22-
startNormal = index + 1
23-
lineHasContent = false
34+
isLast := i == len(parts)-1
35+
if isLast {
36+
// For the last line we don't need to add any "\n" anymore
37+
output = append(output, trimmedLeft...)
38+
continue
39+
}
2440

41+
// Now decide what ending we want:
42+
if bytes.HasSuffix(trimmedLeft, doubleSpace) {
43+
// We already have " " so adding a "\n" is enough
44+
output = append(output, trimmedLeft...)
45+
output = append(output, newlineBreak...)
46+
continue
47+
} else {
48+
// We *prefer* having a hard-line-break " \n"
49+
output = append(output, trimmedLeft...)
50+
output = append(output, hardLineBreak...)
2551
continue
26-
} else if isLastNewline {
27-
newContent = append(newContent, '\\')
28-
newContent = append(newContent, '\n')
29-
30-
startNormal = index + 1
31-
lineHasContent = false
32-
} else if !isSpace {
33-
lineHasContent = true
34-
} else if isSpace && !lineHasContent {
35-
startNormal = index + 1
3652
}
3753
}
3854

39-
newContent = append(newContent, content[startNormal:]...)
40-
41-
return newContent
55+
return output
4256
}

‎internal/textutils/escape_multiline_test.go‎

Lines changed: 15 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,10 @@
11
package textutils
22

33
import (
4-
"bytes"
54
"strings"
65
"testing"
76
)
87

9-
var newline = []byte{'\n'}
10-
var escape = []byte{'\\'}
11-
12-
func EscapeMultiLine_Old(content []byte) []byte {
13-
content = bytes.TrimSpace(content)
14-
content = TrimConsecutiveNewlines(content)
15-
if len(content) == 0 {
16-
return content
17-
}
18-
19-
parts := bytes.Split(content, newline)
20-
for i := range parts {
21-
parts[i] = bytes.TrimSpace(parts[i])
22-
if len(parts[i]) == 0 {
23-
parts[i] = escape
24-
}
25-
}
26-
content = bytes.Join(parts, newline)
27-
28-
return content
29-
}
30-
318
func TestEscapeMultiLine(t *testing.T) {
329
var tests = []struct {
3310
Name string
@@ -48,19 +25,19 @@ func TestEscapeMultiLine(t *testing.T) {
4825
{
4926
Name: "one newline",
5027
Text: "A\nB",
51-
Expected: "A\nB",
28+
Expected: "A \nB",
5229
},
5330
{
5431
Name: "two newlines",
5532
Text: "A\n\nB",
56-
Expected: "A\n\\\nB",
33+
Expected: "A \n\\\nB",
5734
},
5835
{
5936

6037
Name: "many newlines",
6138
// Will be max two newlines characters
6239
Text: "line 1\n\n\n\nline 2",
63-
Expected: "line 1\n\\\nline 2",
40+
Expected: "line 1 \n\\\nline 2",
6441
},
6542

6643
{
@@ -74,64 +51,48 @@ line3
7451
7552
7653
line4`,
77-
Expected: `line1
78-
line2
54+
Expected: `line1
55+
line2
7956
\
80-
line3
57+
line3
8158
\
8259
line4`,
8360
},
8461

8562
{
8663
Name: "empty line with a space",
8764
Text: "line 1\n \nline 2",
88-
Expected: "line 1\n\\\nline 2",
65+
Expected: "line 1 \n\\\nline 2",
8966
},
9067

9168
{
9269
Name: "content has a space",
9370
Text: "a\n\n b",
94-
Expected: "a\n\\\nb",
71+
Expected: "a \n\\\nb",
9572
},
9673
{
9774
Name: "content is indented",
9875
Text: "line 1\n line 2\n\tline 3",
99-
Expected: "line 1\nline 2\nline 3",
76+
Expected: "line 1 \nline 2 \nline 3",
10077
},
10178

10279
// TODO: keep existing "\" characters?
10380
}
10481

10582
for _, test := range tests {
10683
t.Run(test.Name, func(t *testing.T) {
107-
t.Run("old", func(t *testing.T) {
108-
output := EscapeMultiLine_Old([]byte(test.Text))
109-
110-
if string(output) != test.Expected {
111-
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
112-
}
113-
})
114-
t.Run("new", func(t *testing.T) {
115-
output := EscapeMultiLine([]byte(test.Text))
116-
117-
if string(output) != test.Expected {
118-
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
119-
}
120-
})
84+
input := TrimConsecutiveNewlines([]byte(test.Text))
85+
output := EscapeMultiLine(input)
86+
87+
if string(output) != test.Expected {
88+
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
89+
}
12190
})
12291

12392
}
12493
}
12594

12695
func BenchmarkEscapeMultiLine(b *testing.B) {
127-
128-
b.Run("old", func(b *testing.B) {
129-
input := []byte(strings.Repeat("line 1\n\n \nline 2", 100))
130-
131-
for i := 0; i < b.N; i++ {
132-
_ = EscapeMultiLine_Old(input)
133-
}
134-
})
13596
b.Run("new", func(b *testing.B) {
13697
input := []byte(strings.Repeat("line 1\n\n \nline 2", 100))
13798

‎plugin/base/base.go‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ func (b *base) postRenderTrimContent(ctx converter.Context, result []byte) []byt
125125

126126
// Remove too many newlines
127127
result = textutils.TrimConsecutiveNewlines(result)
128+
result = textutils.TrimUnnecessaryHardLineBreaks(result)
128129

129130
return result
130131
}

0 commit comments

Comments
 (0)