jlevy
diff --git a/‎README.md‎
Lines changed: 27 additions & 1 deletion b/‎README.md‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎src/prettyfmt/__init__.py‎
Lines changed: 9 additions & 3 deletions b/‎src/prettyfmt/__init__.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎src/prettyfmt/prettyfmt.py‎
Lines changed: 29 additions & 25 deletions b/‎src/prettyfmt/prettyfmt.py‎
Lines changed: 29 additions & 25 deletions
diff --git a/‎tests/test_prettyfmt.py‎
Lines changed: 18 additions & 9 deletions b/‎tests/test_prettyfmt.py‎
Lines changed: 18 additions & 9 deletions
@@ -9,21 +9,47 @@ strings, timestamps, ages, and sizes.
 
 Simply a more convenient wrapper around `humanize`, `humanfriendly`, and `strif`.
 
+## Installation
+
+```
+# Use pip
+pip install prettyfmt
+# Or poetry
+poetry add prettyfmt
+```
+
+## Usage
+
 ```python
 from prettyfmt import *
 
+abbrev_str("very " * 100 + "long", 32)
+🢂 'very very very very very very v…'
+
 # Simple abbreviations of objects:
 abbrev_obj({"a": "very " * 100 + "long", "b": 23})
 🢂 "{a='very very very very very very very very very very very very ver…', b=23}"
 
+abbrev_obj(["word " * i for i in range(10)], field_max_len=10, list_max_len=4)
+🢂 "['', 'word ', 'word word ', 'word word…', …]"
+
 # Abbreviate but don't break words. Combine with slugifiers.
 abbrev_on_words("very " * 100 + "long", 30)
 🢂 'very very very very very very…'
 
 # My favorite, very good for abbreviating a long title to get a shorter one,
 # or good filename.
 abbrev_phrase_in_middle("very " * 100 + "long", 40)
-🢂 'very very very very … very very very long'
+🢂 'very very very v
+
+# Useful for cleaning up document titles and filenames.
+ugly_title = "A  Very\tVery Very Needlessly Long  {Strange} Document Title [final edited draft23]"
+abbrev_phrase_in_middle(sanitize_title(ugly_title))
+🢂 'A Very Very Very Needlessly Long Strange … final edited draft23'
+
+from slugify import slugify
+slugify(abbrev_phrase_in_middle(sanitize_title(ugly_title)))
+🢂 'a-very-very-very-needlessly-long-strange-final-edited-draft23'
 
 # Ages in seconds or deltas.
 fmt_age(60 * 60 * 24 * 23)
 
@@ -1,14 +1,20 @@
+from .prettyfmt import *  # noqa: F403
+from strif import abbrev_list, abbrev_str, quote_if_needed, single_line
+
 __all__ = (  # noqa: F405
     "abbrev_obj",
     "abbrev_on_words",
     "abbrev_phrase_in_middle",
-    "abbrev_str",
     "fmt_age",
     "fmt_time",
     "fmt_size_human",
     "fmt_size_dual",
     "fmt_words",
     "fmt_paras",
+    "sanitize_title",
+    # Re-export strif functions for convenience:
+    "abbrev_str",
+    "abbrev_list",
+    "single_line",
+    "quote_if_needed",
 )
-
-from .prettyfmt import *  # noqa: F403
 
@@ -128,15 +128,11 @@ def __str__(self) -> str:
     visited.add(id(value))
 
     if isinstance(value, list):
-        truncated_list = value[:list_max_len] + (
-            ["…"] if len(value) > list_max_len else []
-        )
+        truncated_list = value[:list_max_len] + (["…"] if len(value) > list_max_len else [])
         return (
             "["
             + ", ".join(
-                abbrev_obj(
-                    item, field_max_len, list_max_len, key_filter, value_filter, visited
-                )
+                abbrev_obj(item, field_max_len, list_max_len, key_filter, value_filter, visited)
                 for item in truncated_list
             )
             + "]"
@@ -152,11 +148,7 @@ def __str__(self) -> str:
         )
 
     if isinstance(value, dict):
-        return (
-            "{"
-            + _format_kvs(value.items(), field_max_len, key_filter, value_filter)
-            + "}"
-        )
+        return "{" + _format_kvs(value.items(), field_max_len, key_filter, value_filter) + "}"
 
     if isinstance(value, Enum):
         return value.name
@@ -168,10 +160,11 @@ def _trim_trailing_punctuation(text: str) -> str:
     return re.sub(r"[.,;:!?]+$", "", text)
 
 
-def abbrev_on_words(text: str, max_len: int, indicator: str = "…") -> str:
+def abbrev_on_words(text: str, max_len: int = 64, indicator: str = "…") -> str:
     """
-    Abbreviate text to a maximum length, breaking on whole words (unless the first word
-    is too long). For aesthetics, removes trailing punctuation from the last word.
+    Abbreviate text to a maximum character length, breaking on whole words
+    (unless the first word is too long). For aesthetics, removes trailing
+    punctuation from the last word.
     """
     if len(text) <= max_len:
         return text
@@ -180,21 +173,19 @@ def abbrev_on_words(text: str, max_len: int, indicator: str = "…") -> str:
     if words and max_len and len(words[0]) > max_len:
         return abbrev_str(words[0], max_len, indicator)
 
-    while (
-        words
-        and len(_trim_trailing_punctuation(" ".join(words))) + len(indicator) > max_len
-    ):
+    while words and len(_trim_trailing_punctuation(" ".join(words))) + len(indicator) > max_len:
         words.pop()
 
     return _trim_trailing_punctuation(" ".join(words)) + indicator
 
 
 def abbrev_phrase_in_middle(
-    phrase: str, max_len: int, ellipsis="…", max_trailing_len: int = 0
+    phrase: str, max_len: int = 64, ellipsis="…", max_trailing_len: int = 0
 ) -> str:
     """
-    Abbreviate a phrase to a maximum length, preserving the first and last few words of
-    the phrase whenever possible. The ellipsis is inserted in the middle of the phrase.
+    Abbreviate a phrase to a maximum character length, preserving the first and last
+    few words of the phrase whenever possible. The ellipsis is inserted in the middle
+    of the phrase.
     """
     if not max_trailing_len:
         max_trailing_len = min(int(max_len / 2), max(16, int(max_len / 4)))
@@ -214,10 +205,7 @@ def abbrev_phrase_in_middle(
     # Walk through the split words, and tally total number of chars as we go.
     for i in range(len(words)):
         words[i] = abbrev_str(words[i], max_len, ellipsis)
-        if (
-            prefix_tally + len(words[i]) + len(ellipsis) + max_trailing_len >= max_len
-            and i > 0
-        ):
+        if prefix_tally + len(words[i]) + len(ellipsis) + max_trailing_len >= max_len and i > 0:
             prefix_end_index = i
             break
         prefix_tally += len(words[i]) + 1
@@ -377,3 +365,19 @@ def fmt_paras(*paras: str | None, sep: str = "\n\n") -> str:
     """
     filtered_paras = [para.strip() for para in paras if para is not None]
     return sep.join(para for para in filtered_paras if para)
+
+
+DEFAULT_PUNCTUATION = ",./:;'!?/@%&()+“”‘’…–—-"
+
+
+def sanitize_title(text: str, allowed_chars: str = DEFAULT_PUNCTUATION) -> str:
+    """
+    Simple sanitization for arbitrary text to make it suitable for a title or filename.
+    Convert all whitespace to spaces. By default allows the most common punctuation,
+    letters, and numbers, but not Markdown chars like `*` or `[]`, code characters, etc.
+    """
+    # Note \w and \d should now be pretty good for common Unicode letters and digits.
+    # If we had the regex package on hand we could use \p{L}\p{N} instead of \w\d
+    # but probably not worth the import.
+    escaped_chars = re.escape(allowed_chars)
+    return re.sub(r"[^\w\d" + escaped_chars + "]+", " ", text).strip()
@@ -1,4 +1,9 @@
-from prettyfmt import abbrev_on_words, abbrev_phrase_in_middle, fmt_words
+from prettyfmt import (
+    abbrev_on_words,
+    abbrev_phrase_in_middle,
+    fmt_words,
+    sanitize_title,
+)
 
 
 def test_abbreviate_on_words():
@@ -16,17 +21,12 @@ def test_abbreviate_on_words():
 
 
 def test_abbreviate_phrase_in_middle():
+    assert abbrev_phrase_in_middle("Hello, World! This is a test.", 16) == "Hello, … a test."
     assert (
-        abbrev_phrase_in_middle("Hello, World! This is a test.", 16)
-        == "Hello, … a test."
+        abbrev_phrase_in_middle("Hello, World! This is a test.", 23) == "Hello, … This is a test."
     )
     assert (
-        abbrev_phrase_in_middle("Hello, World! This is a test.", 23)
-        == "Hello, … This is a test."
-    )
-    assert (
-        abbrev_phrase_in_middle("Hello, World! This is a test.", 27)
-        == "Hello, … This is a test."
+        abbrev_phrase_in_middle("Hello, World! This is a test.", 27) == "Hello, … This is a test."
     )
     assert (
         abbrev_phrase_in_middle("Hello, World! This is a test.", 40)
@@ -69,3 +69,12 @@ def test_fmt_words():
     assert fmt_words("Hello", "   ", "World", sep="---") == "Hello---   ---World"
     assert fmt_words("Hello", "World", sep=" | ") == "Hello | World"
     assert fmt_words(" Hello ", " ", " World ") == " Hello World "
+
+
+def test_sanitize_title():
+    assert sanitize_title("Hello, World!") == "Hello, World!"
+    assert sanitize_title("Hej, Världen!") == "Hej, Världen!"
+    assert sanitize_title("你好 世界") == "你好 世界"
+    assert sanitize_title("こんにちは、世界") == "こんにちは 世界"
+    assert sanitize_title(" *Hello,*  \n\tWorld!  --123@:': ") == "Hello, World! --123@:':"
+    assert sanitize_title("<script foo='blah'><p>") == "script foo 'blah' p"