@@ -128,15 +128,11 @@ def __str__(self) -> str:
128128 visited .add (id (value ))
129129
130130 if isinstance (value , list ):
131- truncated_list = value [:list_max_len ] + (
132- ["…" ] if len (value ) > list_max_len else []
133- )
131+ truncated_list = value [:list_max_len ] + (["…" ] if len (value ) > list_max_len else [])
134132 return (
135133 "["
136134 + ", " .join (
137- abbrev_obj (
138- item , field_max_len , list_max_len , key_filter , value_filter , visited
139- )
135+ abbrev_obj (item , field_max_len , list_max_len , key_filter , value_filter , visited )
140136 for item in truncated_list
141137 )
142138 + "]"
@@ -152,11 +148,7 @@ def __str__(self) -> str:
152148 )
153149
154150 if isinstance (value , dict ):
155- return (
156- "{"
157- + _format_kvs (value .items (), field_max_len , key_filter , value_filter )
158- + "}"
159- )
151+ return "{" + _format_kvs (value .items (), field_max_len , key_filter , value_filter ) + "}"
160152
161153 if isinstance (value , Enum ):
162154 return value .name
@@ -168,10 +160,11 @@ def _trim_trailing_punctuation(text: str) -> str:
168160 return re .sub (r"[.,;:!?]+$" , "" , text )
169161
170162
171- def abbrev_on_words (text : str , max_len : int , indicator : str = "…" ) -> str :
163+ def abbrev_on_words (text : str , max_len : int = 64 , indicator : str = "…" ) -> str :
172164 """
173- Abbreviate text to a maximum length, breaking on whole words (unless the first word
174- is too long). For aesthetics, removes trailing punctuation from the last word.
165+ Abbreviate text to a maximum character length, breaking on whole words
166+ (unless the first word is too long). For aesthetics, removes trailing
167+ punctuation from the last word.
175168 """
176169 if len (text ) <= max_len :
177170 return text
@@ -180,21 +173,19 @@ def abbrev_on_words(text: str, max_len: int, indicator: str = "…") -> str:
180173 if words and max_len and len (words [0 ]) > max_len :
181174 return abbrev_str (words [0 ], max_len , indicator )
182175
183- while (
184- words
185- and len (_trim_trailing_punctuation (" " .join (words ))) + len (indicator ) > max_len
186- ):
176+ while words and len (_trim_trailing_punctuation (" " .join (words ))) + len (indicator ) > max_len :
187177 words .pop ()
188178
189179 return _trim_trailing_punctuation (" " .join (words )) + indicator
190180
191181
192182def abbrev_phrase_in_middle (
193- phrase : str , max_len : int , ellipsis = "…" , max_trailing_len : int = 0
183+ phrase : str , max_len : int = 64 , ellipsis = "…" , max_trailing_len : int = 0
194184) -> str :
195185 """
196- Abbreviate a phrase to a maximum length, preserving the first and last few words of
197- the phrase whenever possible. The ellipsis is inserted in the middle of the phrase.
186+ Abbreviate a phrase to a maximum character length, preserving the first and last
187+ few words of the phrase whenever possible. The ellipsis is inserted in the middle
188+ of the phrase.
198189 """
199190 if not max_trailing_len :
200191 max_trailing_len = min (int (max_len / 2 ), max (16 , int (max_len / 4 )))
@@ -214,10 +205,7 @@ def abbrev_phrase_in_middle(
214205 # Walk through the split words, and tally total number of chars as we go.
215206 for i in range (len (words )):
216207 words [i ] = abbrev_str (words [i ], max_len , ellipsis )
217- if (
218- prefix_tally + len (words [i ]) + len (ellipsis ) + max_trailing_len >= max_len
219- and i > 0
220- ):
208+ if prefix_tally + len (words [i ]) + len (ellipsis ) + max_trailing_len >= max_len and i > 0 :
221209 prefix_end_index = i
222210 break
223211 prefix_tally += len (words [i ]) + 1
@@ -377,3 +365,19 @@ def fmt_paras(*paras: str | None, sep: str = "\n\n") -> str:
377365 """
378366 filtered_paras = [para .strip () for para in paras if para is not None ]
379367 return sep .join (para for para in filtered_paras if para )
368+
369+
370+ DEFAULT_PUNCTUATION = ",./:;'!?/@%&()+“”‘’…–—-"
371+
372+
373+ def sanitize_title (text : str , allowed_chars : str = DEFAULT_PUNCTUATION ) -> str :
374+ """
375+ Simple sanitization for arbitrary text to make it suitable for a title or filename.
376+ Convert all whitespace to spaces. By default allows the most common punctuation,
377+ letters, and numbers, but not Markdown chars like `*` or `[]`, code characters, etc.
378+ """
379+ # Note \w and \d should now be pretty good for common Unicode letters and digits.
380+ # If we had the regex package on hand we could use \p{L}\p{N} instead of \w\d
381+ # but probably not worth the import.
382+ escaped_chars = re .escape (allowed_chars )
383+ return re .sub (r"[^\w\d" + escaped_chars + "]+" , " " , text ).strip ()
0 commit comments