From fb8f2d10984bcfa2585dc272b4c85d285b722792 Mon Sep 17 00:00:00 2001 From: Michael Manfre Date: Mon, 28 Nov 2022 23:41:36 -0500 Subject: Hashtags --- activities/models/__init__.py | 1 + activities/models/hashtag.py | 187 ++++++++++++++++++++++++++++++++++++++++++ activities/models/post.py | 94 ++++++++++++++++----- 3 files changed, 260 insertions(+), 22 deletions(-) create mode 100644 activities/models/hashtag.py (limited to 'activities/models') diff --git a/activities/models/__init__.py b/activities/models/__init__.py index 1ae3f4c..aa34c0f 100644 --- a/activities/models/__init__.py +++ b/activities/models/__init__.py @@ -1,4 +1,5 @@ from .fan_out import FanOut, FanOutStates # noqa +from .hashtag import Hashtag, HashtagStates # noqa from .post import Post, PostStates # noqa from .post_attachment import PostAttachment, PostAttachmentStates # noqa from .post_interaction import PostInteraction, PostInteractionStates # noqa diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py new file mode 100644 index 0000000..a5754f7 --- /dev/null +++ b/activities/models/hashtag.py @@ -0,0 +1,187 @@ +import re +from datetime import date, timedelta +from typing import Dict, List + +import urlman +from asgiref.sync import sync_to_async +from django.db import models +from django.utils import timezone +from django.utils.safestring import mark_safe + +from core.models import Config +from stator.models import State, StateField, StateGraph, StatorModel + + +class HashtagStates(StateGraph): + outdated = State(try_interval=300, force_initial=True) + updated = State(try_interval=3600, attempt_immediately=False) + + outdated.transitions_to(updated) + updated.transitions_to(outdated) + + @classmethod + async def handle_outdated(cls, instance: "Hashtag"): + """ + Computes the stats and other things for a Hashtag + """ + from .post import Post + + posts_query = Post.objects.local_public().tagged_with(instance) + total = await posts_query.acount() + + today = timezone.now().date() + # TODO: single query + total_today = await posts_query.filter( + created__gte=today, + created__lte=today + timedelta(days=1), + ).acount() + total_month = await posts_query.filter( + created__year=today.year, + created__month=today.month, + ).acount() + total_year = await posts_query.filter( + created__year=today.year, + ).acount() + if total: + if not instance.stats: + instance.stats = {} + instance.stats.update( + { + "total": total, + today.isoformat(): total_today, + today.strftime("%Y-%m"): total_month, + today.strftime("%Y"): total_year, + } + ) + instance.stats_updated = timezone.now() + await sync_to_async(instance.save)() + + return cls.updated + + @classmethod + async def handle_updated(cls, instance: "Hashtag"): + if instance.state_age > Config.system.hashtag_stats_max_age: + return cls.outdated + + +class HashtagQuerySet(models.QuerySet): + def public(self): + public_q = models.Q(public=True) + if Config.system.hashtag_unreviewed_are_public: + public_q |= models.Q(public__isnull=True) + return self.filter(public_q) + + def hashtag_or_alias(self, hashtag: str): + return self.filter( + models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag) + ) + + +class HashtagManager(models.Manager): + def get_queryset(self): + return HashtagQuerySet(self.model, using=self._db) + + def public(self): + return self.get_queryset().public() + + def hashtag_or_alias(self, hashtag: str): + return self.get_queryset().hashtag_or_alias(hashtag) + + +class Hashtag(StatorModel): + + # Normalized hashtag without the '#' + hashtag = models.SlugField(primary_key=True, max_length=100) + + # Friendly display override + name_override = models.CharField(max_length=100, null=True, blank=True) + + # Should this be shown in the public UI? + public = models.BooleanField(null=True) + + # State of this Hashtag + state = StateField(HashtagStates) + + # Metrics for this Hashtag + stats = models.JSONField(null=True, blank=True) + # Timestamp of last time the stats were updated + stats_updated = models.DateTimeField(null=True, blank=True) + + # List of other hashtags that are considered similar + aliases = models.JSONField(null=True, blank=True) + + created = models.DateTimeField(auto_now_add=True) + updated = models.DateTimeField(auto_now=True) + + objects = HashtagManager() + + class urls(urlman.Urls): + root = "/admin/hashtags/" + create = "/admin/hashtags/create/" + edit = "/admin/hashtags/{self.hashtag}/" + delete = "{edit}delete/" + timeline = "/tags/{self.hashtag}/" + + hashtag_regex = re.compile(r"((?:\B#)([a-zA-Z0-9(_)]{1,}\b))") + + def save(self, *args, **kwargs): + self.hashtag = self.hashtag.lstrip("#") + if self.name_override: + self.name_override = self.name_override.lstrip("#") + return super().save(*args, **kwargs) + + @property + def display_name(self): + return self.name_override or self.hashtag + + def __str__(self): + return self.display_name + + def usage_months(self, num: int = 12) -> Dict[date, int]: + """ + Return the most recent num months of stats + """ + if not self.stats: + return {} + results = {} + for key, val in self.stats.items(): + parts = key.split("-") + if len(parts) == 2: + year = int(parts[0]) + month = int(parts[1]) + results[date(year, month, 1)] = val + return dict(sorted(results.items(), reverse=True)[:num]) + + def usage_days(self, num: int = 7) -> Dict[date, int]: + """ + Return the most recent num days of stats + """ + if not self.stats: + return {} + results = {} + for key, val in self.stats.items(): + parts = key.split("-") + if len(parts) == 3: + year = int(parts[0]) + month = int(parts[1]) + day = int(parts[2]) + results[date(year, month, day)] = val + return dict(sorted(results.items(), reverse=True)[:num]) + + @classmethod + def hashtags_from_content(cls, content) -> List[str]: + """ + Return a parsed and sanitized of hashtags found in content without + leading '#'. + """ + hashtag_hits = cls.hashtag_regex.findall(content) + hashtags = sorted({tag[1].lower() for tag in hashtag_hits}) + return list(hashtags) + + @classmethod + def linkify_hashtags(cls, content) -> str: + def replacer(match): + hashtag = match.group() + return f'{hashtag}' + + return mark_safe(Hashtag.hashtag_regex.sub(replacer, content)) diff --git a/activities/models/post.py b/activities/models/post.py index f504fcb..b61abd4 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -10,6 +10,7 @@ from django.utils import timezone from django.utils.safestring import mark_safe from activities.models.fan_out import FanOut +from activities.models.hashtag import Hashtag from core.html import sanitize_post, strip_html from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from stator.models import State, StateField, StateGraph, StatorModel @@ -35,18 +36,23 @@ class PostStates(StateGraph): edited_fanned_out.transitions_to(deleted) @classmethod - async def handle_new(cls, instance: "Post"): - """ - Creates all needed fan-out objects for a new Post. - """ - post = await instance.afetch_full() + async def targets_fan_out(cls, post: "Post", type_: str) -> None: # Fan out to each target for follow in await post.aget_targets(): await FanOut.objects.acreate( identity=follow, - type=FanOut.Types.post, + type=type_, subject_post=post, ) + + @classmethod + async def handle_new(cls, instance: "Post"): + """ + Creates all needed fan-out objects for a new Post. + """ + post = await instance.afetch_full() + await cls.targets_fan_out(post, FanOut.Types.post) + await post.ensure_hashtags() return cls.fanned_out @classmethod @@ -55,13 +61,7 @@ class PostStates(StateGraph): Creates all needed fan-out objects needed to delete a Post. """ post = await instance.afetch_full() - # Fan out to each target - for follow in await post.aget_targets(): - await FanOut.objects.acreate( - identity=follow, - type=FanOut.Types.post_deleted, - subject_post=post, - ) + await cls.targets_fan_out(post, FanOut.Types.post_deleted) return cls.deleted_fanned_out @classmethod @@ -70,16 +70,46 @@ class PostStates(StateGraph): Creates all needed fan-out objects for an edited Post. """ post = await instance.afetch_full() - # Fan out to each target - for follow in await post.aget_targets(): - await FanOut.objects.acreate( - identity=follow, - type=FanOut.Types.post_edited, - subject_post=post, - ) + await cls.targets_fan_out(post, FanOut.Types.post_edited) + await post.ensure_hashtags() return cls.edited_fanned_out +class PostQuerySet(models.QuerySet): + def local_public(self, include_replies: bool = False): + query = self.filter( + visibility__in=[ + Post.Visibilities.public, + Post.Visibilities.local_only, + ], + author__local=True, + ) + if not include_replies: + return query.filter(in_reply_to__isnull=True) + return query + + def tagged_with(self, hashtag: str | Hashtag): + if isinstance(hashtag, str): + tag_q = models.Q(hashtags__contains=hashtag) + else: + tag_q = models.Q(hashtags__contains=hashtag.hashtag) + if hashtag.aliases: + for alias in hashtag.aliases: + tag_q |= models.Q(hashtags__contains=alias) + return self.filter(tag_q) + + +class PostManager(models.Manager): + def get_queryset(self): + return PostQuerySet(self.model, using=self._db) + + def local_public(self, include_replies: bool = False): + return self.get_queryset().local_public(include_replies=include_replies) + + def tagged_with(self, hashtag: str | Hashtag): + return self.get_queryset().tagged_with(hashtag=hashtag) + + class Post(StatorModel): """ A post (status, toot) that is either local or remote. @@ -155,6 +185,8 @@ class Post(StatorModel): created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) + objects = PostManager() + class urls(urlman.Urls): view = "{self.author.urls.view}posts/{self.id}/" object_uri = "{self.author.actor_uri}posts/{self.id}/" @@ -236,7 +268,9 @@ class Post(StatorModel): """ Returns the content formatted for local display """ - return self.linkify_mentions(sanitize_post(self.content), local=True) + return Hashtag.linkify_hashtags( + self.linkify_mentions(sanitize_post(self.content), local=True) + ) def safe_content_remote(self): """ @@ -252,7 +286,7 @@ class Post(StatorModel): ### Async helpers ### - async def afetch_full(self): + async def afetch_full(self) -> "Post": """ Returns a version of the object with all relations pre-loaded """ @@ -281,6 +315,8 @@ class Post(StatorModel): # Maintain local-only for replies if reply_to.visibility == reply_to.Visibilities.local_only: visibility = reply_to.Visibilities.local_only + # Find hashtags in this post + hashtags = Hashtag.hashtags_from_content(content) or None # Strip all HTML and apply linebreaks filter content = linebreaks_filter(strip_html(content)) # Make the Post object @@ -291,6 +327,7 @@ class Post(StatorModel): sensitive=bool(summary), local=True, visibility=visibility, + hashtags=hashtags, in_reply_to=reply_to.object_uri if reply_to else None, ) post.object_uri = post.urls.object_uri @@ -312,6 +349,7 @@ class Post(StatorModel): self.sensitive = bool(summary) self.visibility = visibility self.edited = timezone.now() + self.hashtags = Hashtag.hashtags_from_content(content) or None self.mentions.set(self.mentions_from_content(content, self.author)) self.save() @@ -334,6 +372,18 @@ class Post(StatorModel): mentions.add(identity) return mentions + async def ensure_hashtags(self) -> None: + """ + Ensure any of the already parsed hashtags from this Post + have a corresponding Hashtag record. + """ + # Ensure hashtags + if self.hashtags: + for hashtag in self.hashtags: + await Hashtag.objects.aget_or_create( + hashtag=hashtag, + ) + ### ActivityPub (outbound) ### def to_ap(self) -> Dict: -- cgit v1.2.3