From b5cacb09e3a8e9f04bbf92d179f9c2c96ea05011 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 13 Nov 2022 19:03:43 -0700 Subject: Permit Mastodon's weird HTML through --- activities/admin.py | 6 ++++++ activities/models/post.py | 15 +++++++++++++++ core/html.py | 24 ++++++++++++++++++++++-- templates/activities/_post.html | 2 +- users/views/activitypub.py | 2 +- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/activities/admin.py b/activities/admin.py index d4603a4..947a596 100644 --- a/activities/admin.py +++ b/activities/admin.py @@ -7,6 +7,12 @@ from activities.models import FanOut, Post, PostInteraction, TimelineEvent class PostAdmin(admin.ModelAdmin): list_display = ["id", "state", "author", "created"] raw_id_fields = ["to", "mentions"] + actions = ["force_fetch"] + + @admin.action(description="Force Fetch") + def force_fetch(self, request, queryset): + for instance in queryset: + instance.debug_fetch() @admin.register(TimelineEvent) diff --git a/activities/models/post.py b/activities/models/post.py index d847307..74b335b 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -274,3 +274,18 @@ class Post(StatorModel): TimelineEvent.add_post(follow.source, post) # Force it into fanned_out as it's not ours post.transition_perform(PostStates.fanned_out) + + def debug_fetch(self): + """ + Fetches the Post from its original URL again and updates us with it + """ + response = httpx.get( + self.object_uri, + headers={"Accept": "application/json"}, + follow_redirects=True, + ) + if 200 <= response.status_code < 300: + return self.by_ap( + canonicalise(response.json(), include_security=True), + update=True, + ) diff --git a/core/html.py b/core/html.py index e63dda3..fd41a50 100644 --- a/core/html.py +++ b/core/html.py @@ -1,11 +1,31 @@ import bleach +from bleach.linkifier import LinkifyFilter from django.utils.safestring import mark_safe +def allow_a(tag: str, name: str, value: str): + if name in ["href", "title", "class"]: + return True + elif name == "rel": + # Only allow rel attributes with a small subset of values + # (we're defending against, for example, rel=me) + rel_values = value.split() + if all(v in ["nofollow", "noopener", "noreferrer", "tag"] for v in rel_values): + return True + return False + + def sanitize_post(post_html: str) -> str: """ Only allows a, br, p and span tags, and class attributes. """ - return mark_safe( - bleach.clean(post_html, tags=["a", "br", "p", "span"], attributes=["class"]) + cleaner = bleach.Cleaner( + tags=["a", "br", "p", "span"], + attributes={ # type:ignore + "a": allow_a, + "p": ["class"], + "span": ["class"], + }, + filters=[LinkifyFilter], ) + return mark_safe(cleaner.clean(post_html)) diff --git a/templates/activities/_post.html b/templates/activities/_post.html index 601466e..bf4b189 100644 --- a/templates/activities/_post.html +++ b/templates/activities/_post.html @@ -1,6 +1,6 @@ {% load static %} {% load activity_tags %} -
+
{% if post.author.icon_uri %} diff --git a/users/views/activitypub.py b/users/views/activitypub.py index 54f04bc..1a709ac 100644 --- a/users/views/activitypub.py +++ b/users/views/activitypub.py @@ -118,7 +118,7 @@ class Inbox(View): # See if we can fetch it right now async_to_sync(identity.fetch_actor)() if not identity.public_key: - print("Cannot get actor") + print("Cannot get actor", document["actor"]) return HttpResponseBadRequest("Cannot retrieve actor") # If there's a "signature" payload, verify against that if "signature" in document: -- cgit v1.2.3