diff options
author | Andrew Godwin | 2022-11-27 12:09:08 -0700 |
---|---|---|
committer | Andrew Godwin | 2022-11-27 12:09:08 -0700 |
commit | 8e9e3ecf6935db84bbc731252f592795675de685 (patch) | |
tree | eb3f690cdc7504b3f8fc2da716c1bbfc4bacc36c /core | |
parent | 6c7ddedd342553b53dd98c8de9cbe9e8e2e8cd7c (diff) | |
download | takahe-8e9e3ecf6935db84bbc731252f592795675de685.tar.gz takahe-8e9e3ecf6935db84bbc731252f592795675de685.tar.bz2 takahe-8e9e3ecf6935db84bbc731252f592795675de685.zip |
Some cleanup around editing
Diffstat (limited to 'core')
-rw-r--r-- | core/html.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/core/html.py b/core/html.py index 3230284..dfb7beb 100644 --- a/core/html.py +++ b/core/html.py @@ -38,3 +38,15 @@ def strip_html(post_html: str) -> str: """ cleaner = bleach.Cleaner(tags=[], strip=True, filters=[LinkifyFilter]) return mark_safe(cleaner.clean(post_html)) + + +def html_to_plaintext(post_html: str) -> str: + """ + Tries to do the inverse of the linebreaks filter. + """ + # TODO: Handle HTML entities + # Remove all newlines, then replace br with a newline and /p with two (one comes from bleach) + post_html = post_html.replace("\n", "").replace("<br>", "\n").replace("</p>", "\n") + # Remove all other HTML and return + cleaner = bleach.Cleaner(tags=[], strip=True, filters=[]) + return cleaner.clean(post_html).strip() |