summaryrefslogtreecommitdiffstats
path: root/core
diff options
context:
space:
mode:
authorAndrew Godwin2022-11-13 19:03:43 -0700
committerAndrew Godwin2022-11-13 19:03:43 -0700
commitb5cacb09e3a8e9f04bbf92d179f9c2c96ea05011 (patch)
treef6a47114661122c4248e0132f1f5ba32f7ab9786 /core
parentddb3436275d3f02183f515c38cd3193cd1dfe2f4 (diff)
downloadtakahe-b5cacb09e3a8e9f04bbf92d179f9c2c96ea05011.tar.gz
takahe-b5cacb09e3a8e9f04bbf92d179f9c2c96ea05011.tar.bz2
takahe-b5cacb09e3a8e9f04bbf92d179f9c2c96ea05011.zip
Permit Mastodon's weird HTML through
Diffstat (limited to 'core')
-rw-r--r--core/html.py24
1 files changed, 22 insertions, 2 deletions
diff --git a/core/html.py b/core/html.py
index e63dda3..fd41a50 100644
--- a/core/html.py
+++ b/core/html.py
@@ -1,11 +1,31 @@
import bleach
+from bleach.linkifier import LinkifyFilter
from django.utils.safestring import mark_safe
+def allow_a(tag: str, name: str, value: str):
+ if name in ["href", "title", "class"]:
+ return True
+ elif name == "rel":
+ # Only allow rel attributes with a small subset of values
+ # (we're defending against, for example, rel=me)
+ rel_values = value.split()
+ if all(v in ["nofollow", "noopener", "noreferrer", "tag"] for v in rel_values):
+ return True
+ return False
+
+
def sanitize_post(post_html: str) -> str:
"""
Only allows a, br, p and span tags, and class attributes.
"""
- return mark_safe(
- bleach.clean(post_html, tags=["a", "br", "p", "span"], attributes=["class"])
+ cleaner = bleach.Cleaner(
+ tags=["a", "br", "p", "span"],
+ attributes={ # type:ignore
+ "a": allow_a,
+ "p": ["class"],
+ "span": ["class"],
+ },
+ filters=[LinkifyFilter],
)
+ return mark_safe(cleaner.clean(post_html))