From 93ccb5dd32f4e3942d08814e5a86fb30c90c9ff6 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 4 Dec 2022 21:13:33 -0700 Subject: Make search work with URLs --- activities/models/post.py | 36 ++++++++++++------- activities/views/search.py | 86 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 102 insertions(+), 20 deletions(-) (limited to 'activities') diff --git a/activities/models/post.py b/activities/models/post.py index aa4be16..f8a5e75 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -3,7 +3,7 @@ from typing import Dict, Iterable, List, Optional, Set import httpx import urlman -from asgiref.sync import sync_to_async +from asgiref.sync import async_to_sync, sync_to_async from django.contrib.postgres.indexes import GinIndex from django.db import models, transaction from django.template.defaultfilters import linebreaks_filter @@ -16,6 +16,7 @@ from core.html import sanitize_post, strip_html from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from stator.models import State, StateField, StateGraph, StatorModel from users.models.identity import Identity +from users.models.system_actor import SystemActor class PostStates(StateGraph): @@ -609,19 +610,28 @@ class Post(StatorModel): return cls.objects.get(object_uri=object_uri) except cls.DoesNotExist: if fetch: - # Go grab the data from the URI - response = httpx.get( - object_uri, - headers={"Accept": "application/json"}, - follow_redirects=True, - ) - if 200 <= response.status_code < 300: - return cls.by_ap( - canonicalise(response.json(), include_security=True), - create=True, - update=True, + try: + response = async_to_sync(SystemActor().signed_request)( + method="get", uri=object_uri + ) + except (httpx.RequestError, httpx.ConnectError): + raise cls.DoesNotExist(f"Could not fetch {object_uri}") + if response.status_code in [404, 410]: + raise cls.DoesNotExist(f"No post at {object_uri}") + if response.status_code >= 500: + raise cls.DoesNotExist(f"Server error fetching {object_uri}") + if response.status_code >= 400: + raise cls.DoesNotExist( + f"Error fetching post from {object_uri}: {response.status_code}", + {response.content}, ) - raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") + return cls.by_ap( + canonicalise(response.json(), include_security=True), + create=True, + update=True, + ) + else: + raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") @classmethod def handle_create_ap(cls, data): diff --git a/activities/views/search.py b/activities/views/search.py index 8bdef78..ab37e17 100644 --- a/activities/views/search.py +++ b/activities/views/search.py @@ -1,11 +1,14 @@ from typing import Set +import httpx from asgiref.sync import async_to_sync from django import forms from django.views.generic import FormView -from activities.models import Hashtag +from activities.models import Hashtag, Post +from core.ld import canonicalise from users.models import Domain, Identity, IdentityStates +from users.models.system_actor import SystemActor class Search(FormView): @@ -14,11 +17,20 @@ class Search(FormView): class form_class(forms.Form): query = forms.CharField( - help_text="Search for a user by @username@domain or hashtag by #tagname", + help_text="Search for:\nA user by @username@domain or their profile URL\nA hashtag by #tagname\nA post by its URL", widget=forms.TextInput(attrs={"type": "search", "autofocus": "autofocus"}), ) - def search_identities(self, query: str): + def search_identities_handle(self, query: str): + """ + Searches for identities by their handles + """ + + # Short circuit if it's obviously not for us + if "://" in query: + return set() + + # Try to fetch the user by handle query = query.lstrip("@") results: Set[Identity] = set() if "@" in query: @@ -52,12 +64,65 @@ class Search(FormView): results.add(identity) return results + def search_url(self, query: str) -> Post | Identity | None: + """ + Searches for an identity or post by URL. + """ + + # Short circuit if it's obviously not for us + if "://" not in query: + return None + + # Clean up query + query = query.strip() + + # Fetch the provided URL as the system actor to retrieve the AP JSON + try: + response = async_to_sync(SystemActor().signed_request)( + method="get", uri=query + ) + except (httpx.RequestError, httpx.ConnectError): + return None + if response.status_code >= 400: + return None + document = canonicalise(response.json(), include_security=True) + type = document.get("type", "unknown").lower() + + # Is it an identity? + if type == "person": + # Try and retrieve the profile by actor URI + identity = Identity.by_actor_uri(document["id"], create=True) + if identity and identity.state == IdentityStates.outdated: + async_to_sync(identity.fetch_actor)() + return identity + + # Is it a post? + elif type == "note": + # Try and retrieve the post by URI + # (we do not trust the JSON we just got - fetch from source!) + try: + post = Post.by_object_uri(document["id"], fetch=True) + # We may need to live-fetch the identity too + if post.author.state == IdentityStates.outdated: + async_to_sync(post.author.fetch_actor)() + return post + except Post.DoesNotExist: + return None + + # Dunno what it is + else: + return None + def search_hashtags(self, query: str): - results: Set[Hashtag] = set() + """ + Searches for hashtags by their name + """ - if "@" in query: - return results + # Short circuit out if it's obviously not a hashtag + if "@" in query or "://" in query: + return set() + results: Set[Hashtag] = set() query = query.lstrip("#") for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]: results.add(hashtag) @@ -68,10 +133,17 @@ class Search(FormView): def form_valid(self, form): query = form.cleaned_data["query"].lower() results = { - "identities": self.search_identities(query), + "identities": self.search_identities_handle(query), "hashtags": self.search_hashtags(query), + "posts": set(), } + url_result = self.search_url(query) + if isinstance(url_result, Identity): + results["identities"].add(url_result) + if isinstance(url_result, Post): + results["posts"].add(url_result) + # Render results context = self.get_context_data(form=form) context["results"] = results -- cgit v1.2.3