diff options
Diffstat (limited to 'activities/models')
| -rw-r--r-- | activities/models/__init__.py | 1 | ||||
| -rw-r--r-- | activities/models/hashtag.py | 187 | ||||
| -rw-r--r-- | activities/models/post.py | 94 | 
3 files changed, 260 insertions, 22 deletions
diff --git a/activities/models/__init__.py b/activities/models/__init__.py index 1ae3f4c..aa34c0f 100644 --- a/activities/models/__init__.py +++ b/activities/models/__init__.py @@ -1,4 +1,5 @@  from .fan_out import FanOut, FanOutStates  # noqa +from .hashtag import Hashtag, HashtagStates  # noqa  from .post import Post, PostStates  # noqa  from .post_attachment import PostAttachment, PostAttachmentStates  # noqa  from .post_interaction import PostInteraction, PostInteractionStates  # noqa diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py new file mode 100644 index 0000000..a5754f7 --- /dev/null +++ b/activities/models/hashtag.py @@ -0,0 +1,187 @@ +import re +from datetime import date, timedelta +from typing import Dict, List + +import urlman +from asgiref.sync import sync_to_async +from django.db import models +from django.utils import timezone +from django.utils.safestring import mark_safe + +from core.models import Config +from stator.models import State, StateField, StateGraph, StatorModel + + +class HashtagStates(StateGraph): +    outdated = State(try_interval=300, force_initial=True) +    updated = State(try_interval=3600, attempt_immediately=False) + +    outdated.transitions_to(updated) +    updated.transitions_to(outdated) + +    @classmethod +    async def handle_outdated(cls, instance: "Hashtag"): +        """ +        Computes the stats and other things for a Hashtag +        """ +        from .post import Post + +        posts_query = Post.objects.local_public().tagged_with(instance) +        total = await posts_query.acount() + +        today = timezone.now().date() +        # TODO: single query +        total_today = await posts_query.filter( +            created__gte=today, +            created__lte=today + timedelta(days=1), +        ).acount() +        total_month = await posts_query.filter( +            created__year=today.year, +            created__month=today.month, +        ).acount() +        total_year = await posts_query.filter( +            created__year=today.year, +        ).acount() +        if total: +            if not instance.stats: +                instance.stats = {} +            instance.stats.update( +                { +                    "total": total, +                    today.isoformat(): total_today, +                    today.strftime("%Y-%m"): total_month, +                    today.strftime("%Y"): total_year, +                } +            ) +            instance.stats_updated = timezone.now() +            await sync_to_async(instance.save)() + +        return cls.updated + +    @classmethod +    async def handle_updated(cls, instance: "Hashtag"): +        if instance.state_age > Config.system.hashtag_stats_max_age: +            return cls.outdated + + +class HashtagQuerySet(models.QuerySet): +    def public(self): +        public_q = models.Q(public=True) +        if Config.system.hashtag_unreviewed_are_public: +            public_q |= models.Q(public__isnull=True) +        return self.filter(public_q) + +    def hashtag_or_alias(self, hashtag: str): +        return self.filter( +            models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag) +        ) + + +class HashtagManager(models.Manager): +    def get_queryset(self): +        return HashtagQuerySet(self.model, using=self._db) + +    def public(self): +        return self.get_queryset().public() + +    def hashtag_or_alias(self, hashtag: str): +        return self.get_queryset().hashtag_or_alias(hashtag) + + +class Hashtag(StatorModel): + +    # Normalized hashtag without the '#' +    hashtag = models.SlugField(primary_key=True, max_length=100) + +    # Friendly display override +    name_override = models.CharField(max_length=100, null=True, blank=True) + +    # Should this be shown in the public UI? +    public = models.BooleanField(null=True) + +    # State of this Hashtag +    state = StateField(HashtagStates) + +    # Metrics for this Hashtag +    stats = models.JSONField(null=True, blank=True) +    # Timestamp of last time the stats were updated +    stats_updated = models.DateTimeField(null=True, blank=True) + +    # List of other hashtags that are considered similar +    aliases = models.JSONField(null=True, blank=True) + +    created = models.DateTimeField(auto_now_add=True) +    updated = models.DateTimeField(auto_now=True) + +    objects = HashtagManager() + +    class urls(urlman.Urls): +        root = "/admin/hashtags/" +        create = "/admin/hashtags/create/" +        edit = "/admin/hashtags/{self.hashtag}/" +        delete = "{edit}delete/" +        timeline = "/tags/{self.hashtag}/" + +    hashtag_regex = re.compile(r"((?:\B#)([a-zA-Z0-9(_)]{1,}\b))") + +    def save(self, *args, **kwargs): +        self.hashtag = self.hashtag.lstrip("#") +        if self.name_override: +            self.name_override = self.name_override.lstrip("#") +        return super().save(*args, **kwargs) + +    @property +    def display_name(self): +        return self.name_override or self.hashtag + +    def __str__(self): +        return self.display_name + +    def usage_months(self, num: int = 12) -> Dict[date, int]: +        """ +        Return the most recent num months of stats +        """ +        if not self.stats: +            return {} +        results = {} +        for key, val in self.stats.items(): +            parts = key.split("-") +            if len(parts) == 2: +                year = int(parts[0]) +                month = int(parts[1]) +                results[date(year, month, 1)] = val +        return dict(sorted(results.items(), reverse=True)[:num]) + +    def usage_days(self, num: int = 7) -> Dict[date, int]: +        """ +        Return the most recent num days of stats +        """ +        if not self.stats: +            return {} +        results = {} +        for key, val in self.stats.items(): +            parts = key.split("-") +            if len(parts) == 3: +                year = int(parts[0]) +                month = int(parts[1]) +                day = int(parts[2]) +                results[date(year, month, day)] = val +        return dict(sorted(results.items(), reverse=True)[:num]) + +    @classmethod +    def hashtags_from_content(cls, content) -> List[str]: +        """ +        Return a parsed and sanitized of hashtags found in content without +        leading '#'. +        """ +        hashtag_hits = cls.hashtag_regex.findall(content) +        hashtags = sorted({tag[1].lower() for tag in hashtag_hits}) +        return list(hashtags) + +    @classmethod +    def linkify_hashtags(cls, content) -> str: +        def replacer(match): +            hashtag = match.group() +            return f'<a class="hashtag" href="/tags/{hashtag.lstrip("#").lower()}/">{hashtag}</a>' + +        return mark_safe(Hashtag.hashtag_regex.sub(replacer, content)) diff --git a/activities/models/post.py b/activities/models/post.py index f504fcb..b61abd4 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -10,6 +10,7 @@ from django.utils import timezone  from django.utils.safestring import mark_safe  from activities.models.fan_out import FanOut +from activities.models.hashtag import Hashtag  from core.html import sanitize_post, strip_html  from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date  from stator.models import State, StateField, StateGraph, StatorModel @@ -35,18 +36,23 @@ class PostStates(StateGraph):      edited_fanned_out.transitions_to(deleted)      @classmethod -    async def handle_new(cls, instance: "Post"): -        """ -        Creates all needed fan-out objects for a new Post. -        """ -        post = await instance.afetch_full() +    async def targets_fan_out(cls, post: "Post", type_: str) -> None:          # Fan out to each target          for follow in await post.aget_targets():              await FanOut.objects.acreate(                  identity=follow, -                type=FanOut.Types.post, +                type=type_,                  subject_post=post,              ) + +    @classmethod +    async def handle_new(cls, instance: "Post"): +        """ +        Creates all needed fan-out objects for a new Post. +        """ +        post = await instance.afetch_full() +        await cls.targets_fan_out(post, FanOut.Types.post) +        await post.ensure_hashtags()          return cls.fanned_out      @classmethod @@ -55,13 +61,7 @@ class PostStates(StateGraph):          Creates all needed fan-out objects needed to delete a Post.          """          post = await instance.afetch_full() -        # Fan out to each target -        for follow in await post.aget_targets(): -            await FanOut.objects.acreate( -                identity=follow, -                type=FanOut.Types.post_deleted, -                subject_post=post, -            ) +        await cls.targets_fan_out(post, FanOut.Types.post_deleted)          return cls.deleted_fanned_out      @classmethod @@ -70,16 +70,46 @@ class PostStates(StateGraph):          Creates all needed fan-out objects for an edited Post.          """          post = await instance.afetch_full() -        # Fan out to each target -        for follow in await post.aget_targets(): -            await FanOut.objects.acreate( -                identity=follow, -                type=FanOut.Types.post_edited, -                subject_post=post, -            ) +        await cls.targets_fan_out(post, FanOut.Types.post_edited) +        await post.ensure_hashtags()          return cls.edited_fanned_out +class PostQuerySet(models.QuerySet): +    def local_public(self, include_replies: bool = False): +        query = self.filter( +            visibility__in=[ +                Post.Visibilities.public, +                Post.Visibilities.local_only, +            ], +            author__local=True, +        ) +        if not include_replies: +            return query.filter(in_reply_to__isnull=True) +        return query + +    def tagged_with(self, hashtag: str | Hashtag): +        if isinstance(hashtag, str): +            tag_q = models.Q(hashtags__contains=hashtag) +        else: +            tag_q = models.Q(hashtags__contains=hashtag.hashtag) +            if hashtag.aliases: +                for alias in hashtag.aliases: +                    tag_q |= models.Q(hashtags__contains=alias) +        return self.filter(tag_q) + + +class PostManager(models.Manager): +    def get_queryset(self): +        return PostQuerySet(self.model, using=self._db) + +    def local_public(self, include_replies: bool = False): +        return self.get_queryset().local_public(include_replies=include_replies) + +    def tagged_with(self, hashtag: str | Hashtag): +        return self.get_queryset().tagged_with(hashtag=hashtag) + +  class Post(StatorModel):      """      A post (status, toot) that is either local or remote. @@ -155,6 +185,8 @@ class Post(StatorModel):      created = models.DateTimeField(auto_now_add=True)      updated = models.DateTimeField(auto_now=True) +    objects = PostManager() +      class urls(urlman.Urls):          view = "{self.author.urls.view}posts/{self.id}/"          object_uri = "{self.author.actor_uri}posts/{self.id}/" @@ -236,7 +268,9 @@ class Post(StatorModel):          """          Returns the content formatted for local display          """ -        return self.linkify_mentions(sanitize_post(self.content), local=True) +        return Hashtag.linkify_hashtags( +            self.linkify_mentions(sanitize_post(self.content), local=True) +        )      def safe_content_remote(self):          """ @@ -252,7 +286,7 @@ class Post(StatorModel):      ### Async helpers ### -    async def afetch_full(self): +    async def afetch_full(self) -> "Post":          """          Returns a version of the object with all relations pre-loaded          """ @@ -281,6 +315,8 @@ class Post(StatorModel):                  # Maintain local-only for replies                  if reply_to.visibility == reply_to.Visibilities.local_only:                      visibility = reply_to.Visibilities.local_only +            # Find hashtags in this post +            hashtags = Hashtag.hashtags_from_content(content) or None              # Strip all HTML and apply linebreaks filter              content = linebreaks_filter(strip_html(content))              # Make the Post object @@ -291,6 +327,7 @@ class Post(StatorModel):                  sensitive=bool(summary),                  local=True,                  visibility=visibility, +                hashtags=hashtags,                  in_reply_to=reply_to.object_uri if reply_to else None,              )              post.object_uri = post.urls.object_uri @@ -312,6 +349,7 @@ class Post(StatorModel):              self.sensitive = bool(summary)              self.visibility = visibility              self.edited = timezone.now() +            self.hashtags = Hashtag.hashtags_from_content(content) or None              self.mentions.set(self.mentions_from_content(content, self.author))              self.save() @@ -334,6 +372,18 @@ class Post(StatorModel):                  mentions.add(identity)          return mentions +    async def ensure_hashtags(self) -> None: +        """ +        Ensure any of the already parsed hashtags from this Post +        have a corresponding Hashtag record. +        """ +        # Ensure hashtags +        if self.hashtags: +            for hashtag in self.hashtags: +                await Hashtag.objects.aget_or_create( +                    hashtag=hashtag, +                ) +      ### ActivityPub (outbound) ###      def to_ap(self) -> Dict:  | 
