From fb8f2d10984bcfa2585dc272b4c85d285b722792 Mon Sep 17 00:00:00 2001 From: Michael Manfre Date: Mon, 28 Nov 2022 23:41:36 -0500 Subject: Hashtags --- activities/admin.py | 25 ++++- activities/migrations/0002_hashtag.py | 51 +++++++++ activities/models/__init__.py | 1 + activities/models/hashtag.py | 187 +++++++++++++++++++++++++++++++ activities/models/post.py | 94 ++++++++++++---- activities/templatetags/activity_tags.py | 13 +++ activities/views/admin/__init__.py | 0 activities/views/explore.py | 26 +++++ activities/views/search.py | 43 +++++-- activities/views/timelines.py | 45 ++++++-- 10 files changed, 446 insertions(+), 39 deletions(-) create mode 100644 activities/migrations/0002_hashtag.py create mode 100644 activities/models/hashtag.py create mode 100644 activities/views/admin/__init__.py create mode 100644 activities/views/explore.py (limited to 'activities') diff --git a/activities/admin.py b/activities/admin.py index 8e29d22..c4875ca 100644 --- a/activities/admin.py +++ b/activities/admin.py @@ -1,7 +1,9 @@ +from asgiref.sync import async_to_sync from django.contrib import admin from activities.models import ( FanOut, + Hashtag, Post, PostAttachment, PostInteraction, @@ -9,6 +11,20 @@ from activities.models import ( ) +@admin.register(Hashtag) +class HashtagAdmin(admin.ModelAdmin): + list_display = ["hashtag", "name_override", "state", "stats_updated", "created"] + + readonly_fields = ["created", "updated", "stats_updated"] + + actions = ["force_execution"] + + @admin.action(description="Force Execution") + def force_execution(self, request, queryset): + for instance in queryset: + instance.transition_perform("outdated") + + class PostAttachmentInline(admin.StackedInline): model = PostAttachment extra = 0 @@ -18,7 +34,7 @@ class PostAttachmentInline(admin.StackedInline): class PostAdmin(admin.ModelAdmin): list_display = ["id", "state", "author", "created"] raw_id_fields = ["to", "mentions", "author"] - actions = ["force_fetch"] + actions = ["force_fetch", "reparse_hashtags"] search_fields = ["content"] inlines = [PostAttachmentInline] readonly_fields = ["created", "updated", "object_json"] @@ -28,6 +44,13 @@ class PostAdmin(admin.ModelAdmin): for instance in queryset: instance.debug_fetch() + @admin.action(description="Reprocess content for hashtags") + def reparse_hashtags(self, request, queryset): + for instance in queryset: + instance.hashtags = Hashtag.hashtags_from_content(instance.content) or None + instance.save() + async_to_sync(instance.ensure_hashtags)() + @admin.display(description="ActivityPub JSON") def object_json(self, instance): return instance.to_ap() diff --git a/activities/migrations/0002_hashtag.py b/activities/migrations/0002_hashtag.py new file mode 100644 index 0000000..468bd95 --- /dev/null +++ b/activities/migrations/0002_hashtag.py @@ -0,0 +1,51 @@ +# Generated by Django 4.1.3 on 2022-11-27 20:16 + +from django.db import migrations, models + +import activities.models.hashtag +import stator.models + + +class Migration(migrations.Migration): + + dependencies = [ + ("activities", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="Hashtag", + fields=[ + ("state_ready", models.BooleanField(default=True)), + ("state_changed", models.DateTimeField(auto_now_add=True)), + ("state_attempted", models.DateTimeField(blank=True, null=True)), + ("state_locked_until", models.DateTimeField(blank=True, null=True)), + ( + "hashtag", + models.SlugField(max_length=100, primary_key=True, serialize=False), + ), + ( + "name_override", + models.CharField(blank=True, max_length=100, null=True), + ), + ("public", models.BooleanField(null=True)), + ( + "state", + stator.models.StateField( + choices=[("outdated", "outdated"), ("updated", "updated")], + default="outdated", + graph=activities.models.hashtag.HashtagStates, + max_length=100, + ), + ), + ("stats", models.JSONField(blank=True, null=True)), + ("stats_updated", models.DateTimeField(blank=True, null=True)), + ("aliases", models.JSONField(blank=True, null=True)), + ("created", models.DateTimeField(auto_now_add=True)), + ("updated", models.DateTimeField(auto_now=True)), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/activities/models/__init__.py b/activities/models/__init__.py index 1ae3f4c..aa34c0f 100644 --- a/activities/models/__init__.py +++ b/activities/models/__init__.py @@ -1,4 +1,5 @@ from .fan_out import FanOut, FanOutStates # noqa +from .hashtag import Hashtag, HashtagStates # noqa from .post import Post, PostStates # noqa from .post_attachment import PostAttachment, PostAttachmentStates # noqa from .post_interaction import PostInteraction, PostInteractionStates # noqa diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py new file mode 100644 index 0000000..a5754f7 --- /dev/null +++ b/activities/models/hashtag.py @@ -0,0 +1,187 @@ +import re +from datetime import date, timedelta +from typing import Dict, List + +import urlman +from asgiref.sync import sync_to_async +from django.db import models +from django.utils import timezone +from django.utils.safestring import mark_safe + +from core.models import Config +from stator.models import State, StateField, StateGraph, StatorModel + + +class HashtagStates(StateGraph): + outdated = State(try_interval=300, force_initial=True) + updated = State(try_interval=3600, attempt_immediately=False) + + outdated.transitions_to(updated) + updated.transitions_to(outdated) + + @classmethod + async def handle_outdated(cls, instance: "Hashtag"): + """ + Computes the stats and other things for a Hashtag + """ + from .post import Post + + posts_query = Post.objects.local_public().tagged_with(instance) + total = await posts_query.acount() + + today = timezone.now().date() + # TODO: single query + total_today = await posts_query.filter( + created__gte=today, + created__lte=today + timedelta(days=1), + ).acount() + total_month = await posts_query.filter( + created__year=today.year, + created__month=today.month, + ).acount() + total_year = await posts_query.filter( + created__year=today.year, + ).acount() + if total: + if not instance.stats: + instance.stats = {} + instance.stats.update( + { + "total": total, + today.isoformat(): total_today, + today.strftime("%Y-%m"): total_month, + today.strftime("%Y"): total_year, + } + ) + instance.stats_updated = timezone.now() + await sync_to_async(instance.save)() + + return cls.updated + + @classmethod + async def handle_updated(cls, instance: "Hashtag"): + if instance.state_age > Config.system.hashtag_stats_max_age: + return cls.outdated + + +class HashtagQuerySet(models.QuerySet): + def public(self): + public_q = models.Q(public=True) + if Config.system.hashtag_unreviewed_are_public: + public_q |= models.Q(public__isnull=True) + return self.filter(public_q) + + def hashtag_or_alias(self, hashtag: str): + return self.filter( + models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag) + ) + + +class HashtagManager(models.Manager): + def get_queryset(self): + return HashtagQuerySet(self.model, using=self._db) + + def public(self): + return self.get_queryset().public() + + def hashtag_or_alias(self, hashtag: str): + return self.get_queryset().hashtag_or_alias(hashtag) + + +class Hashtag(StatorModel): + + # Normalized hashtag without the '#' + hashtag = models.SlugField(primary_key=True, max_length=100) + + # Friendly display override + name_override = models.CharField(max_length=100, null=True, blank=True) + + # Should this be shown in the public UI? + public = models.BooleanField(null=True) + + # State of this Hashtag + state = StateField(HashtagStates) + + # Metrics for this Hashtag + stats = models.JSONField(null=True, blank=True) + # Timestamp of last time the stats were updated + stats_updated = models.DateTimeField(null=True, blank=True) + + # List of other hashtags that are considered similar + aliases = models.JSONField(null=True, blank=True) + + created = models.DateTimeField(auto_now_add=True) + updated = models.DateTimeField(auto_now=True) + + objects = HashtagManager() + + class urls(urlman.Urls): + root = "/admin/hashtags/" + create = "/admin/hashtags/create/" + edit = "/admin/hashtags/{self.hashtag}/" + delete = "{edit}delete/" + timeline = "/tags/{self.hashtag}/" + + hashtag_regex = re.compile(r"((?:\B#)([a-zA-Z0-9(_)]{1,}\b))") + + def save(self, *args, **kwargs): + self.hashtag = self.hashtag.lstrip("#") + if self.name_override: + self.name_override = self.name_override.lstrip("#") + return super().save(*args, **kwargs) + + @property + def display_name(self): + return self.name_override or self.hashtag + + def __str__(self): + return self.display_name + + def usage_months(self, num: int = 12) -> Dict[date, int]: + """ + Return the most recent num months of stats + """ + if not self.stats: + return {} + results = {} + for key, val in self.stats.items(): + parts = key.split("-") + if len(parts) == 2: + year = int(parts[0]) + month = int(parts[1]) + results[date(year, month, 1)] = val + return dict(sorted(results.items(), reverse=True)[:num]) + + def usage_days(self, num: int = 7) -> Dict[date, int]: + """ + Return the most recent num days of stats + """ + if not self.stats: + return {} + results = {} + for key, val in self.stats.items(): + parts = key.split("-") + if len(parts) == 3: + year = int(parts[0]) + month = int(parts[1]) + day = int(parts[2]) + results[date(year, month, day)] = val + return dict(sorted(results.items(), reverse=True)[:num]) + + @classmethod + def hashtags_from_content(cls, content) -> List[str]: + """ + Return a parsed and sanitized of hashtags found in content without + leading '#'. + """ + hashtag_hits = cls.hashtag_regex.findall(content) + hashtags = sorted({tag[1].lower() for tag in hashtag_hits}) + return list(hashtags) + + @classmethod + def linkify_hashtags(cls, content) -> str: + def replacer(match): + hashtag = match.group() + return f'{hashtag}' + + return mark_safe(Hashtag.hashtag_regex.sub(replacer, content)) diff --git a/activities/models/post.py b/activities/models/post.py index f504fcb..b61abd4 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -10,6 +10,7 @@ from django.utils import timezone from django.utils.safestring import mark_safe from activities.models.fan_out import FanOut +from activities.models.hashtag import Hashtag from core.html import sanitize_post, strip_html from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from stator.models import State, StateField, StateGraph, StatorModel @@ -35,18 +36,23 @@ class PostStates(StateGraph): edited_fanned_out.transitions_to(deleted) @classmethod - async def handle_new(cls, instance: "Post"): - """ - Creates all needed fan-out objects for a new Post. - """ - post = await instance.afetch_full() + async def targets_fan_out(cls, post: "Post", type_: str) -> None: # Fan out to each target for follow in await post.aget_targets(): await FanOut.objects.acreate( identity=follow, - type=FanOut.Types.post, + type=type_, subject_post=post, ) + + @classmethod + async def handle_new(cls, instance: "Post"): + """ + Creates all needed fan-out objects for a new Post. + """ + post = await instance.afetch_full() + await cls.targets_fan_out(post, FanOut.Types.post) + await post.ensure_hashtags() return cls.fanned_out @classmethod @@ -55,13 +61,7 @@ class PostStates(StateGraph): Creates all needed fan-out objects needed to delete a Post. """ post = await instance.afetch_full() - # Fan out to each target - for follow in await post.aget_targets(): - await FanOut.objects.acreate( - identity=follow, - type=FanOut.Types.post_deleted, - subject_post=post, - ) + await cls.targets_fan_out(post, FanOut.Types.post_deleted) return cls.deleted_fanned_out @classmethod @@ -70,16 +70,46 @@ class PostStates(StateGraph): Creates all needed fan-out objects for an edited Post. """ post = await instance.afetch_full() - # Fan out to each target - for follow in await post.aget_targets(): - await FanOut.objects.acreate( - identity=follow, - type=FanOut.Types.post_edited, - subject_post=post, - ) + await cls.targets_fan_out(post, FanOut.Types.post_edited) + await post.ensure_hashtags() return cls.edited_fanned_out +class PostQuerySet(models.QuerySet): + def local_public(self, include_replies: bool = False): + query = self.filter( + visibility__in=[ + Post.Visibilities.public, + Post.Visibilities.local_only, + ], + author__local=True, + ) + if not include_replies: + return query.filter(in_reply_to__isnull=True) + return query + + def tagged_with(self, hashtag: str | Hashtag): + if isinstance(hashtag, str): + tag_q = models.Q(hashtags__contains=hashtag) + else: + tag_q = models.Q(hashtags__contains=hashtag.hashtag) + if hashtag.aliases: + for alias in hashtag.aliases: + tag_q |= models.Q(hashtags__contains=alias) + return self.filter(tag_q) + + +class PostManager(models.Manager): + def get_queryset(self): + return PostQuerySet(self.model, using=self._db) + + def local_public(self, include_replies: bool = False): + return self.get_queryset().local_public(include_replies=include_replies) + + def tagged_with(self, hashtag: str | Hashtag): + return self.get_queryset().tagged_with(hashtag=hashtag) + + class Post(StatorModel): """ A post (status, toot) that is either local or remote. @@ -155,6 +185,8 @@ class Post(StatorModel): created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) + objects = PostManager() + class urls(urlman.Urls): view = "{self.author.urls.view}posts/{self.id}/" object_uri = "{self.author.actor_uri}posts/{self.id}/" @@ -236,7 +268,9 @@ class Post(StatorModel): """ Returns the content formatted for local display """ - return self.linkify_mentions(sanitize_post(self.content), local=True) + return Hashtag.linkify_hashtags( + self.linkify_mentions(sanitize_post(self.content), local=True) + ) def safe_content_remote(self): """ @@ -252,7 +286,7 @@ class Post(StatorModel): ### Async helpers ### - async def afetch_full(self): + async def afetch_full(self) -> "Post": """ Returns a version of the object with all relations pre-loaded """ @@ -281,6 +315,8 @@ class Post(StatorModel): # Maintain local-only for replies if reply_to.visibility == reply_to.Visibilities.local_only: visibility = reply_to.Visibilities.local_only + # Find hashtags in this post + hashtags = Hashtag.hashtags_from_content(content) or None # Strip all HTML and apply linebreaks filter content = linebreaks_filter(strip_html(content)) # Make the Post object @@ -291,6 +327,7 @@ class Post(StatorModel): sensitive=bool(summary), local=True, visibility=visibility, + hashtags=hashtags, in_reply_to=reply_to.object_uri if reply_to else None, ) post.object_uri = post.urls.object_uri @@ -312,6 +349,7 @@ class Post(StatorModel): self.sensitive = bool(summary) self.visibility = visibility self.edited = timezone.now() + self.hashtags = Hashtag.hashtags_from_content(content) or None self.mentions.set(self.mentions_from_content(content, self.author)) self.save() @@ -334,6 +372,18 @@ class Post(StatorModel): mentions.add(identity) return mentions + async def ensure_hashtags(self) -> None: + """ + Ensure any of the already parsed hashtags from this Post + have a corresponding Hashtag record. + """ + # Ensure hashtags + if self.hashtags: + for hashtag in self.hashtags: + await Hashtag.objects.aget_or_create( + hashtag=hashtag, + ) + ### ActivityPub (outbound) ### def to_ap(self) -> Dict: diff --git a/activities/templatetags/activity_tags.py b/activities/templatetags/activity_tags.py index 571e2d6..fb822f6 100644 --- a/activities/templatetags/activity_tags.py +++ b/activities/templatetags/activity_tags.py @@ -3,6 +3,8 @@ import datetime from django import template from django.utils import timezone +from activities.models import Hashtag + register = template.Library() @@ -31,3 +33,14 @@ def timedeltashort(value: datetime.datetime): years = max(days // 365.25, 1) text = f"{years:0n}y" return text + + +@register.filter +def linkify_hashtags(value: str): + """ + Convert hashtags in content in to /tags// links. + """ + if not value: + return "" + + return Hashtag.linkify_hashtags(value) diff --git a/activities/views/admin/__init__.py b/activities/views/admin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/activities/views/explore.py b/activities/views/explore.py new file mode 100644 index 0000000..ddb1e6c --- /dev/null +++ b/activities/views/explore.py @@ -0,0 +1,26 @@ +from django.views.generic import ListView + +from activities.models import Hashtag + + +class ExploreTag(ListView): + + template_name = "activities/explore_tag.html" + extra_context = { + "current_page": "explore", + "allows_refresh": True, + } + paginate_by = 20 + + def get_queryset(self): + return ( + Hashtag.objects.public() + .filter( + stats__total__gt=0, + ) + .order_by("-stats__total") + )[:20] + + +class Explore(ExploreTag): + pass diff --git a/activities/views/search.py b/activities/views/search.py index b175052..4719f64 100644 --- a/activities/views/search.py +++ b/activities/views/search.py @@ -1,6 +1,9 @@ +from typing import Set + from django import forms from django.views.generic import FormView +from activities.models import Hashtag from users.models import Domain, Identity @@ -9,13 +12,13 @@ class Search(FormView): template_name = "activities/search.html" class form_class(forms.Form): - query = forms.CharField(help_text="Search for a user by @username@domain") - - def form_valid(self, form): - query = form.cleaned_data["query"].lstrip("@").lower() - results = {"identities": set()} - # Search identities + query = forms.CharField( + help_text="Search for a user by @username@domain or hashtag by #tagname" + ) + def search_identities(self, query: str): + query = query.lstrip("@") + results: Set[Identity] = set() if "@" in query: username, domain = query.split("@", 1) @@ -35,13 +38,35 @@ class Search(FormView): ) identity = None if identity: - results["identities"].add(identity) + results.add(identity) else: for identity in Identity.objects.filter(username=query)[:20]: - results["identities"].add(identity) + results.add(identity) for identity in Identity.objects.filter(username__startswith=query)[:20]: - results["identities"].add(identity) + results.add(identity) + return results + + def search_hashtags(self, query: str): + results: Set[Hashtag] = set() + + if "@" in query: + return results + + query = query.lstrip("#") + for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]: + results.add(hashtag) + for hashtag in Hashtag.objects.public().filter(hashtag__startswith=query)[:10]: + results.add(hashtag) + return results + + def form_valid(self, form): + query = form.cleaned_data["query"].lower() + results = { + "identities": self.search_identities(query), + "hashtags": self.search_hashtags(query), + } + # Render results context = self.get_context_data(form=form) context["results"] = results diff --git a/activities/views/timelines.py b/activities/views/timelines.py index 4f2a515..ffe329c 100644 --- a/activities/views/timelines.py +++ b/activities/views/timelines.py @@ -1,10 +1,10 @@ from django import forms -from django.shortcuts import redirect +from django.shortcuts import get_object_or_404, redirect from django.template.defaultfilters import linebreaks_filter from django.utils.decorators import method_decorator from django.views.generic import FormView, ListView -from activities.models import Post, PostInteraction, TimelineEvent +from activities.models import Hashtag, Post, PostInteraction, TimelineEvent from core.models import Config from users.decorators import identity_required @@ -61,6 +61,41 @@ class Home(FormView): return redirect(".") +class Tag(ListView): + + template_name = "activities/tag.html" + extra_context = { + "current_page": "tag", + "allows_refresh": True, + } + paginate_by = 50 + + def get(self, request, hashtag, *args, **kwargs): + tag = hashtag.lower().lstrip("#") + if hashtag != tag: + # SEO sanitize + return redirect(f"/tags/{tag}/", permanent=True) + self.hashtag = get_object_or_404(Hashtag.objects.public(), hashtag=tag) + return super().get(request, *args, **kwargs) + + def get_queryset(self): + return ( + Post.objects.local_public() + .tagged_with(self.hashtag) + .select_related("author") + .prefetch_related("attachments") + .order_by("-created")[:50] + ) + + def get_context_data(self): + context = super().get_context_data() + context["hashtag"] = self.hashtag + context["interactions"] = PostInteraction.get_post_interactions( + context["page_obj"], self.request.identity + ) + return context + + class Local(ListView): template_name = "activities/local.html" @@ -72,11 +107,7 @@ class Local(ListView): def get_queryset(self): return ( - Post.objects.filter( - visibility=Post.Visibilities.public, - author__local=True, - in_reply_to__isnull=True, - ) + Post.objects.local_public() .select_related("author") .prefetch_related("attachments") .order_by("-created")[:50] -- cgit v1.2.3