From b5cacb09e3a8e9f04bbf92d179f9c2c96ea05011 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 13 Nov 2022 19:03:43 -0700 Subject: Permit Mastodon's weird HTML through --- core/html.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'core/html.py') diff --git a/core/html.py b/core/html.py index e63dda3..fd41a50 100644 --- a/core/html.py +++ b/core/html.py @@ -1,11 +1,31 @@ import bleach +from bleach.linkifier import LinkifyFilter from django.utils.safestring import mark_safe +def allow_a(tag: str, name: str, value: str): + if name in ["href", "title", "class"]: + return True + elif name == "rel": + # Only allow rel attributes with a small subset of values + # (we're defending against, for example, rel=me) + rel_values = value.split() + if all(v in ["nofollow", "noopener", "noreferrer", "tag"] for v in rel_values): + return True + return False + + def sanitize_post(post_html: str) -> str: """ Only allows a, br, p and span tags, and class attributes. """ - return mark_safe( - bleach.clean(post_html, tags=["a", "br", "p", "span"], attributes=["class"]) + cleaner = bleach.Cleaner( + tags=["a", "br", "p", "span"], + attributes={ # type:ignore + "a": allow_a, + "p": ["class"], + "span": ["class"], + }, + filters=[LinkifyFilter], ) + return mark_safe(cleaner.clean(post_html)) -- cgit v1.2.3