Skip to content

Commit 35c5dc0

Browse files
committed
Add --ellipses support.
1 parent 98714a8 commit 35c5dc0

File tree

8 files changed

+197
-7
lines changed

8 files changed

+197
-7
lines changed

‎README.md‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,14 @@ quote marks around regular text.
151151

152152
This feature is enabled with the `--smartquotes` flag or the `--auto` convenience flag.
153153

154+
## Ellipsis Support
155+
156+
There is a similar feature for converting `...` to an ellipsis character `` when it
157+
appears to be appropriate (i.e., not in code blocks and when adjacent to words or
158+
punctuation).
159+
160+
This feature is enabled with the `--ellipses` flag or the `--auto` convenience flag.
161+
154162
## Frontmatter Support
155163

156164
Because **YAML frontmatter** is common on Markdown files, any YAML frontmatter (content

‎src/flowmark/cli.py‎

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class Options:
5555
semantic: bool
5656
cleanups: bool
5757
smartquotes: bool
58+
ellipses: bool
5859
inplace: bool
5960
nobackup: bool
6061
version: bool
@@ -119,6 +120,13 @@ def _parse_args(args: list[str] | None = None) -> Options:
119120
help="Convert straight quotes to typographic (curly) quotes and apostrophes "
120121
"(only applies to Markdown mode)",
121122
)
123+
parser.add_argument(
124+
"--ellipses",
125+
action="store_true",
126+
default=False,
127+
help="Convert three dots (...) to ellipsis character (…) with normalized spacing "
128+
"(only applies to Markdown mode)",
129+
)
122130
parser.add_argument(
123131
"-i", "--inplace", action="store_true", help="Edit the file in place (ignores --output)"
124132
)
@@ -130,7 +138,7 @@ def _parse_args(args: list[str] | None = None) -> Options:
130138
parser.add_argument(
131139
"--auto",
132140
action="store_true",
133-
help="Same as `--inplace --nobackup --semantic --cleanups --smartquotes`, as a convenience for "
141+
help="Same as `--inplace --nobackup --semantic --cleanups --smartquotes --ellipses`, as a convenience for "
134142
"fully auto-formatting files",
135143
)
136144
parser.add_argument(
@@ -146,6 +154,7 @@ def _parse_args(args: list[str] | None = None) -> Options:
146154
opts.semantic = True
147155
opts.cleanups = True
148156
opts.smartquotes = True
157+
opts.ellipses = True
149158

150159
return Options(
151160
file=opts.file,
@@ -155,6 +164,7 @@ def _parse_args(args: list[str] | None = None) -> Options:
155164
semantic=opts.semantic,
156165
cleanups=opts.cleanups,
157166
smartquotes=opts.smartquotes,
167+
ellipses=opts.ellipses,
158168
inplace=opts.inplace,
159169
nobackup=opts.nobackup,
160170
version=opts.version,
@@ -193,6 +203,7 @@ def main(args: list[str] | None = None) -> int:
193203
semantic=options.semantic,
194204
cleanups=options.cleanups,
195205
smartquotes=options.smartquotes,
206+
ellipses=options.ellipses,
196207
make_parents=True,
197208
)
198209
except ValueError as e:

‎src/flowmark/ellipses.py‎

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import re
2+
from re import Pattern
3+
4+
ELLIPSIS_PATTERN: Pattern[str] = re.compile(
5+
r"(^|[\w\"\'“‘])(\s*)(\.\.\.)([.,:;?!)\-—\"\'”’]?)(\s*)",
6+
re.MULTILINE,
7+
)
8+
9+
10+
def ellipses(text: str) -> str:
11+
r"""
12+
Replace three consecutive dots with a proper ellipsis character (…).
13+
14+
Rules:
15+
- `...` must be preceded by start of line OR a word character (with optional space)
16+
- `...` must be followed by word character (with optional space) OR punctuation OR end of line
17+
- If immediately before the `...` is a word character (no whitespace), a space is inserted before it.
18+
- If immediately after the `...` is a word character (no whitespace), a space is inserted after it.
19+
- If the punctuation [\"\'“‘] immediately precedes the ellipsis, there is no space between the
20+
punctuation and the ellipsis.
21+
- If punctuation [.,:;?!)\-—] follows the ellipsis, there is no space between the ellipsis and
22+
the punctuation.
23+
"""
24+
25+
def replace_match(match: re.Match[str]) -> str:
26+
prefix = match.group(1)
27+
spaces_before = match.group(2)
28+
punct = match.group(4)
29+
spaces_after = match.group(5)
30+
31+
# Get what follows the match
32+
end_pos = match.end()
33+
remaining = text[end_pos:] if end_pos < len(text) else ""
34+
next_char = remaining[0] if remaining else ""
35+
36+
# Check boundary - must be followed by word or end of line
37+
if remaining and not re.match(r"\w|$", next_char):
38+
return match.group(0)
39+
40+
result = prefix
41+
42+
# Add space before ellipsis if word char with no existing space
43+
if prefix and re.match(r"\w", prefix) and not spaces_before:
44+
result += " "
45+
else:
46+
result += spaces_before
47+
48+
result += "…" + punct
49+
50+
# Add space after ellipsis if word char follows with no space and no punct
51+
if next_char and re.match(r"\w", next_char) and not spaces_after and not punct:
52+
result += " "
53+
else:
54+
result += spaces_after
55+
56+
return result
57+
58+
return ELLIPSIS_PATTERN.sub(replace_match, text)

‎src/flowmark/markdown_filling.py‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from flowmark.custom_marko import custom_marko
1919
from flowmark.doc_cleanups import doc_cleanups
2020
from flowmark.doc_transforms import rewrite_text_content
21+
from flowmark.ellipses import ellipses as apply_ellipses
2122
from flowmark.frontmatter import split_frontmatter
2223
from flowmark.line_wrappers import LineWrapper, line_wrap_by_sentence, line_wrap_to_width
2324
from flowmark.sentence_split_regex import split_sentences_regex
@@ -82,6 +83,7 @@ def fill_markdown(
8283
semantic: bool = False,
8384
cleanups: bool = False,
8485
smartquotes: bool = False,
86+
ellipses: bool = False,
8587
line_wrapper: LineWrapper | None = None,
8688
) -> str:
8789
"""
@@ -130,6 +132,8 @@ def fill_markdown(
130132
doc_cleanups(document)
131133
if smartquotes:
132134
rewrite_text_content(document, smart_quotes, coalesce_lines=True)
135+
if ellipses:
136+
rewrite_text_content(document, apply_ellipses, coalesce_lines=True)
133137
result = marko.render(document)
134138

135139
# Reattach frontmatter if it was present

‎src/flowmark/reformat_api.py‎

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def reformat_text(
1515
semantic: bool = True,
1616
cleanups: bool = True,
1717
smartquotes: bool = False,
18+
ellipses: bool = False,
1819
) -> str:
1920
"""
2021
Reformat text or markdown and wrap lines. Simply a convenient wrapper
@@ -31,7 +32,12 @@ def reformat_text(
3132
else:
3233
# Markdown mode
3334
result = fill_markdown(
34-
text, width=width, semantic=semantic, cleanups=cleanups, smartquotes=smartquotes
35+
text,
36+
width=width,
37+
semantic=semantic,
38+
cleanups=cleanups,
39+
smartquotes=smartquotes,
40+
ellipses=ellipses,
3541
)
3642

3743
return result
@@ -47,6 +53,7 @@ def reformat_file(
4753
semantic: bool = False,
4854
cleanups: bool = True,
4955
smartquotes: bool = False,
56+
ellipses: bool = False,
5057
make_parents: bool = True,
5158
) -> None:
5259
"""
@@ -66,6 +73,8 @@ def reformat_file(
6673
headers (only applies to Markdown mode).
6774
smartquotes: Convert straight quotes to typographic (curly) quotes and apostrophes
6875
(only applies to Markdown mode).
76+
ellipses: Convert three dots (...) to ellipsis character (…) with normalized spacing
77+
(only applies to Markdown mode).
6978
make_parents: Whether to make parent directories if they don't exist.
7079
"""
7180
read_stdin = path == "-"
@@ -79,7 +88,7 @@ def reformat_file(
7988
else:
8089
text = Path(path).read_text()
8190

82-
result = reformat_text(text, width, plaintext, semantic, cleanups, smartquotes)
91+
result = reformat_text(text, width, plaintext, semantic, cleanups, smartquotes, ellipses)
8392

8493
if inplace:
8594
backup_suffix = ".orig" if not nobackup else ""

‎tests/test_ellipses.py‎

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from flowmark.ellipses import ellipses
2+
3+
4+
def test_ellipses():
5+
# Basic conversions, adding space only if needed next to a word character
6+
assert ellipses("word...") == "word …"
7+
assert ellipses("word ...") == "word …"
8+
assert ellipses("word ...") == "word …"
9+
assert ellipses("word ... ") == "word … "
10+
assert ellipses("word ... ") == "word … "
11+
12+
assert ellipses("word...word") == "word … word"
13+
assert ellipses("word ... word") == "word … word"
14+
assert ellipses("word ... word") == "word … word"
15+
assert ellipses("Hello...World") == "Hello … World"
16+
17+
assert ellipses("...word") == "… word"
18+
assert ellipses("... word") == "… word"
19+
assert ellipses(" ... word") == " … word"
20+
assert ellipses("word...") == "word …"
21+
assert ellipses("...") == "…"
22+
23+
assert ellipses("I think... well... maybe...") == "I think … well … maybe …"
24+
assert ellipses("First...second...third") == "First … second … third"
25+
assert ellipses("Wait... what... really?") == "Wait … what … really?"
26+
assert (
27+
ellipses("I was thinking... maybe we should go.") == "I was thinking … maybe we should go."
28+
)
29+
assert (
30+
ellipses("The options are... well... complicated.")
31+
== "The options are … well … complicated."
32+
)
33+
34+
# Punctuation cases.
35+
assert ellipses("word....") == "word …."
36+
assert ellipses("word.... ") == "word …. "
37+
assert ellipses("word.... text") == "word …. text"
38+
assert ellipses("word....word") == "word ….word"
39+
assert ellipses("He said...") == "He said …"
40+
assert ellipses("Really...?") == "Really …?"
41+
assert ellipses("Wait...!") == "Wait …!"
42+
assert ellipses("Well...,") == "Well …,"
43+
assert ellipses("word .... Another") == "word …. Another"
44+
45+
assert ellipses("word.....") == "word....."
46+
assert ellipses("word......") == "word......"
47+
48+
# Does not apply.
49+
assert ellipses("..") == ".." # Only two dots
50+
assert ellipses(".") == "." # Single dot
51+
assert ellipses(". . .") == ". . ." # Spaced dots
52+
assert ellipses("$...") == "$..." # Not preceded by word char or start
53+
assert ellipses("@...") == "@..." # Not preceded by word char or start
54+
assert ellipses("#...") == "#..." # Not preceded by word char or start
55+
assert ellipses("...@") == "...@" # Not followed by word char or end
56+
assert ellipses("...$") == "...$" # Not followed by word char or end ($ in pattern)
57+
assert ellipses("...#") == "...#" # Not followed by word char or end
58+
59+
# Multiline cases.
60+
assert (
61+
ellipses("First line...\nSecond line... continues\n...starts here")
62+
== "First line …\nSecond line … continues\n… starts here"
63+
)
64+
assert ellipses("Hello....\n") == "Hello ….\n"
65+
66+
# Edge cases.
67+
assert ellipses("") == ""
68+
assert ellipses("No ellipses here") == "No ellipses here"
69+
assert ellipses(" ") == " "
70+
assert ellipses("....") == "…."
71+
assert ellipses(" ....") == " …."
72+
73+
# Code-like cases
74+
assert ellipses("if (x...) {") == "if (x...) {"
75+
assert ellipses("[...]") == "[...]"
76+
assert ellipses("{...}") == "{...}"
77+
assert ellipses("path/to/...") == "path/to/..."
78+
79+
# Four dots followed by word gets space
80+
assert ellipses("word....word") == "word ….word"
81+
assert ellipses("word....123") == "word ….123"
82+
83+
# Quote cases
84+
assert ellipses("'...word") == "'… word"
85+
assert ellipses("'... word") == "'… word"
86+
assert ellipses('"...word') == '"… word'
87+
assert ellipses('"... word') == '"… word'
88+
assert ellipses("'...word'") == "'… word'"
89+
assert ellipses('"...word"') == '"… word"'
90+
assert ellipses("'...'") == "'…'"
91+
assert ellipses('"..."') == '"…"'
92+
assert ellipses("word...'") == "word …'"
93+
assert ellipses('word..."') == 'word …"'
94+
assert ellipses("word...'next") == "word …'next"
95+
assert ellipses('word..."next') == 'word …"next'
96+
assert ellipses("He said '...'") == "He said '…'"
97+
assert ellipses('She said "..."') == 'She said "…"'

‎tests/test_ref_docs.py‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class TestCase:
2626
semantic: bool
2727
cleanups: bool
2828
smartquotes: bool
29+
ellipses: bool = False
2930

3031
test_cases: list[TestCase] = [
3132
TestCase(
@@ -55,6 +56,7 @@ class TestCase:
5556
semantic=True,
5657
cleanups=True,
5758
smartquotes=True,
59+
ellipses=True,
5860
),
5961
]
6062

@@ -69,6 +71,7 @@ class TestCase:
6971
semantic=case.semantic,
7072
cleanups=case.cleanups,
7173
smartquotes=case.smartquotes,
74+
ellipses=case.ellipses,
7275
)
7376
if actual != expected:
7477
actual_path = testdoc_dir / f"testdoc.actual.{case.name}.md"

‎tests/testdocs/testdoc.expected.auto.md‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ high-stakes decisions for those who give and *receive it*. Blah blah blah and bl
6161

6262
## Some apostrophes and quotes--and dashes, too
6363

64-
“Hello,” he said! “What’s... your specialty?”
64+
“Hello,” he said! “What’s your specialty?”
6565
he enquired (in an idle tone).
6666

6767
He read the multiline quote: “This is line one and this is line two and finally line
@@ -614,7 +614,7 @@ Linux offers a rich set of POSIX\-compliant and Linux\-specific mechanisms.
614614
tempfile.NamedTemporaryFile(mode='w', dir=dest\_dir, delete=False) (use
615615
appropriate mode, e.g., wb for binary).
616616

617-
+ Use a try...finally block to ensure cleanup:\
617+
+ Use a tryfinally block to ensure cleanup:\
618618
Python\
619619
temp\_file = None\
620620
try:\
@@ -881,7 +881,7 @@ or em*phases* or **bold**face or *em*phases or bold**face** or em*phases* or
881881
**bold**face or *em*phases or bold**face** or em*phases* or **bold**face or *em*phases
882882
or bold**face** or em*phases* or **bold**face or *em*phases or bold**face** or
883883
em*phases* or **bold**face or *em*phases or bold**face** or em*phases* can flow on and
884-
on...
884+
on
885885

886886
*blah blah* *blah blah* *blah blah* *blah blah* *blah blah* *blah blah* *blah blah*
887887
*blah blah* *blah blah* *blah blah* *blah blah* *blah blah* *blah blah* *blah blah*
@@ -1062,7 +1062,7 @@ And let’s try some links with angle brackets.
10621062
the value proposition of a company and ensuring the alignment of interests between
10631063
the two parties.
10641064

1065-
[^multiparagraph]: This is an even longer footnote...
1065+
[^multiparagraph]: This is an even longer footnote
10661066

10671067
Paragraph 1.
10681068

0 commit comments

Comments
 (0)