From fb8f2d10984bcfa2585dc272b4c85d285b722792 Mon Sep 17 00:00:00 2001
From: Michael Manfre
Date: Mon, 28 Nov 2022 23:41:36 -0500
Subject: Hashtags

---
 activities/admin.py                      |  25 ++++-
 activities/migrations/0002_hashtag.py    |  51 +++++++++
 activities/models/__init__.py            |   1 +
 activities/models/hashtag.py             | 187 +++++++++++++++++++++++++++++++
 activities/models/post.py                |  94 ++++++++++++----
 activities/templatetags/activity_tags.py |  13 +++
 activities/views/admin/__init__.py       |   0
 activities/views/explore.py              |  26 +++++
 activities/views/search.py               |  43 +++++--
 activities/views/timelines.py            |  45 ++++++--
 10 files changed, 446 insertions(+), 39 deletions(-)
 create mode 100644 activities/migrations/0002_hashtag.py
 create mode 100644 activities/models/hashtag.py
 create mode 100644 activities/views/admin/__init__.py
 create mode 100644 activities/views/explore.py

(limited to 'activities')

diff --git a/activities/admin.py b/activities/admin.py
index 8e29d22..c4875ca 100644
--- a/activities/admin.py
+++ b/activities/admin.py
@@ -1,7 +1,9 @@
+from asgiref.sync import async_to_sync
 from django.contrib import admin
 
 from activities.models import (
     FanOut,
+    Hashtag,
     Post,
     PostAttachment,
     PostInteraction,
@@ -9,6 +11,20 @@ from activities.models import (
 )
 
 
+@admin.register(Hashtag)
+class HashtagAdmin(admin.ModelAdmin):
+    list_display = ["hashtag", "name_override", "state", "stats_updated", "created"]
+
+    readonly_fields = ["created", "updated", "stats_updated"]
+
+    actions = ["force_execution"]
+
+    @admin.action(description="Force Execution")
+    def force_execution(self, request, queryset):
+        for instance in queryset:
+            instance.transition_perform("outdated")
+
+
 class PostAttachmentInline(admin.StackedInline):
     model = PostAttachment
     extra = 0
@@ -18,7 +34,7 @@ class PostAttachmentInline(admin.StackedInline):
 class PostAdmin(admin.ModelAdmin):
     list_display = ["id", "state", "author", "created"]
     raw_id_fields = ["to", "mentions", "author"]
-    actions = ["force_fetch"]
+    actions = ["force_fetch", "reparse_hashtags"]
     search_fields = ["content"]
     inlines = [PostAttachmentInline]
     readonly_fields = ["created", "updated", "object_json"]
@@ -28,6 +44,13 @@ class PostAdmin(admin.ModelAdmin):
         for instance in queryset:
             instance.debug_fetch()
 
+    @admin.action(description="Reprocess content for hashtags")
+    def reparse_hashtags(self, request, queryset):
+        for instance in queryset:
+            instance.hashtags = Hashtag.hashtags_from_content(instance.content) or None
+            instance.save()
+            async_to_sync(instance.ensure_hashtags)()
+
     @admin.display(description="ActivityPub JSON")
     def object_json(self, instance):
         return instance.to_ap()
diff --git a/activities/migrations/0002_hashtag.py b/activities/migrations/0002_hashtag.py
new file mode 100644
index 0000000..468bd95
--- /dev/null
+++ b/activities/migrations/0002_hashtag.py
@@ -0,0 +1,51 @@
+# Generated by Django 4.1.3 on 2022-11-27 20:16
+
+from django.db import migrations, models
+
+import activities.models.hashtag
+import stator.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("activities", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Hashtag",
+            fields=[
+                ("state_ready", models.BooleanField(default=True)),
+                ("state_changed", models.DateTimeField(auto_now_add=True)),
+                ("state_attempted", models.DateTimeField(blank=True, null=True)),
+                ("state_locked_until", models.DateTimeField(blank=True, null=True)),
+                (
+                    "hashtag",
+                    models.SlugField(max_length=100, primary_key=True, serialize=False),
+                ),
+                (
+                    "name_override",
+                    models.CharField(blank=True, max_length=100, null=True),
+                ),
+                ("public", models.BooleanField(null=True)),
+                (
+                    "state",
+                    stator.models.StateField(
+                        choices=[("outdated", "outdated"), ("updated", "updated")],
+                        default="outdated",
+                        graph=activities.models.hashtag.HashtagStates,
+                        max_length=100,
+                    ),
+                ),
+                ("stats", models.JSONField(blank=True, null=True)),
+                ("stats_updated", models.DateTimeField(blank=True, null=True)),
+                ("aliases", models.JSONField(blank=True, null=True)),
+                ("created", models.DateTimeField(auto_now_add=True)),
+                ("updated", models.DateTimeField(auto_now=True)),
+            ],
+            options={
+                "abstract": False,
+            },
+        ),
+    ]
diff --git a/activities/models/__init__.py b/activities/models/__init__.py
index 1ae3f4c..aa34c0f 100644
--- a/activities/models/__init__.py
+++ b/activities/models/__init__.py
@@ -1,4 +1,5 @@
 from .fan_out import FanOut, FanOutStates  # noqa
+from .hashtag import Hashtag, HashtagStates  # noqa
 from .post import Post, PostStates  # noqa
 from .post_attachment import PostAttachment, PostAttachmentStates  # noqa
 from .post_interaction import PostInteraction, PostInteractionStates  # noqa
diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py
new file mode 100644
index 0000000..a5754f7
--- /dev/null
+++ b/activities/models/hashtag.py
@@ -0,0 +1,187 @@
+import re
+from datetime import date, timedelta
+from typing import Dict, List
+
+import urlman
+from asgiref.sync import sync_to_async
+from django.db import models
+from django.utils import timezone
+from django.utils.safestring import mark_safe
+
+from core.models import Config
+from stator.models import State, StateField, StateGraph, StatorModel
+
+
+class HashtagStates(StateGraph):
+    outdated = State(try_interval=300, force_initial=True)
+    updated = State(try_interval=3600, attempt_immediately=False)
+
+    outdated.transitions_to(updated)
+    updated.transitions_to(outdated)
+
+    @classmethod
+    async def handle_outdated(cls, instance: "Hashtag"):
+        """
+        Computes the stats and other things for a Hashtag
+        """
+        from .post import Post
+
+        posts_query = Post.objects.local_public().tagged_with(instance)
+        total = await posts_query.acount()
+
+        today = timezone.now().date()
+        # TODO: single query
+        total_today = await posts_query.filter(
+            created__gte=today,
+            created__lte=today + timedelta(days=1),
+        ).acount()
+        total_month = await posts_query.filter(
+            created__year=today.year,
+            created__month=today.month,
+        ).acount()
+        total_year = await posts_query.filter(
+            created__year=today.year,
+        ).acount()
+        if total:
+            if not instance.stats:
+                instance.stats = {}
+            instance.stats.update(
+                {
+                    "total": total,
+                    today.isoformat(): total_today,
+                    today.strftime("%Y-%m"): total_month,
+                    today.strftime("%Y"): total_year,
+                }
+            )
+            instance.stats_updated = timezone.now()
+            await sync_to_async(instance.save)()
+
+        return cls.updated
+
+    @classmethod
+    async def handle_updated(cls, instance: "Hashtag"):
+        if instance.state_age > Config.system.hashtag_stats_max_age:
+            return cls.outdated
+
+
+class HashtagQuerySet(models.QuerySet):
+    def public(self):
+        public_q = models.Q(public=True)
+        if Config.system.hashtag_unreviewed_are_public:
+            public_q |= models.Q(public__isnull=True)
+        return self.filter(public_q)
+
+    def hashtag_or_alias(self, hashtag: str):
+        return self.filter(
+            models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag)
+        )
+
+
+class HashtagManager(models.Manager):
+    def get_queryset(self):
+        return HashtagQuerySet(self.model, using=self._db)
+
+    def public(self):
+        return self.get_queryset().public()
+
+    def hashtag_or_alias(self, hashtag: str):
+        return self.get_queryset().hashtag_or_alias(hashtag)
+
+
+class Hashtag(StatorModel):
+
+    # Normalized hashtag without the '#'
+    hashtag = models.SlugField(primary_key=True, max_length=100)
+
+    # Friendly display override
+    name_override = models.CharField(max_length=100, null=True, blank=True)
+
+    # Should this be shown in the public UI?
+    public = models.BooleanField(null=True)
+
+    # State of this Hashtag
+    state = StateField(HashtagStates)
+
+    # Metrics for this Hashtag
+    stats = models.JSONField(null=True, blank=True)
+    # Timestamp of last time the stats were updated
+    stats_updated = models.DateTimeField(null=True, blank=True)
+
+    # List of other hashtags that are considered similar
+    aliases = models.JSONField(null=True, blank=True)
+
+    created = models.DateTimeField(auto_now_add=True)
+    updated = models.DateTimeField(auto_now=True)
+
+    objects = HashtagManager()
+
+    class urls(urlman.Urls):
+        root = "/admin/hashtags/"
+        create = "/admin/hashtags/create/"
+        edit = "/admin/hashtags/{self.hashtag}/"
+        delete = "{edit}delete/"
+        timeline = "/tags/{self.hashtag}/"
+
+    hashtag_regex = re.compile(r"((?:\B#)([a-zA-Z0-9(_)]{1,}\b))")
+
+    def save(self, *args, **kwargs):
+        self.hashtag = self.hashtag.lstrip("#")
+        if self.name_override:
+            self.name_override = self.name_override.lstrip("#")
+        return super().save(*args, **kwargs)
+
+    @property
+    def display_name(self):
+        return self.name_override or self.hashtag
+
+    def __str__(self):
+        return self.display_name
+
+    def usage_months(self, num: int = 12) -> Dict[date, int]:
+        """
+        Return the most recent num months of stats
+        """
+        if not self.stats:
+            return {}
+        results = {}
+        for key, val in self.stats.items():
+            parts = key.split("-")
+            if len(parts) == 2:
+                year = int(parts[0])
+                month = int(parts[1])
+                results[date(year, month, 1)] = val
+        return dict(sorted(results.items(), reverse=True)[:num])
+
+    def usage_days(self, num: int = 7) -> Dict[date, int]:
+        """
+        Return the most recent num days of stats
+        """
+        if not self.stats:
+            return {}
+        results = {}
+        for key, val in self.stats.items():
+            parts = key.split("-")
+            if len(parts) == 3:
+                year = int(parts[0])
+                month = int(parts[1])
+                day = int(parts[2])
+                results[date(year, month, day)] = val
+        return dict(sorted(results.items(), reverse=True)[:num])
+
+    @classmethod
+    def hashtags_from_content(cls, content) -> List[str]:
+        """
+        Return a parsed and sanitized of hashtags found in content without
+        leading '#'.
+        """
+        hashtag_hits = cls.hashtag_regex.findall(content)
+        hashtags = sorted({tag[1].lower() for tag in hashtag_hits})
+        return list(hashtags)
+
+    @classmethod
+    def linkify_hashtags(cls, content) -> str:
+        def replacer(match):
+            hashtag = match.group()
+            return f'<a class="hashtag" href="/tags/{hashtag.lstrip("#").lower()}/">{hashtag}</a>'
+
+        return mark_safe(Hashtag.hashtag_regex.sub(replacer, content))
diff --git a/activities/models/post.py b/activities/models/post.py
index f504fcb..b61abd4 100644
--- a/activities/models/post.py
+++ b/activities/models/post.py
@@ -10,6 +10,7 @@ from django.utils import timezone
 from django.utils.safestring import mark_safe
 
 from activities.models.fan_out import FanOut
+from activities.models.hashtag import Hashtag
 from core.html import sanitize_post, strip_html
 from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
 from stator.models import State, StateField, StateGraph, StatorModel
@@ -35,18 +36,23 @@ class PostStates(StateGraph):
     edited_fanned_out.transitions_to(deleted)
 
     @classmethod
-    async def handle_new(cls, instance: "Post"):
-        """
-        Creates all needed fan-out objects for a new Post.
-        """
-        post = await instance.afetch_full()
+    async def targets_fan_out(cls, post: "Post", type_: str) -> None:
         # Fan out to each target
         for follow in await post.aget_targets():
             await FanOut.objects.acreate(
                 identity=follow,
-                type=FanOut.Types.post,
+                type=type_,
                 subject_post=post,
             )
+
+    @classmethod
+    async def handle_new(cls, instance: "Post"):
+        """
+        Creates all needed fan-out objects for a new Post.
+        """
+        post = await instance.afetch_full()
+        await cls.targets_fan_out(post, FanOut.Types.post)
+        await post.ensure_hashtags()
         return cls.fanned_out
 
     @classmethod
@@ -55,13 +61,7 @@ class PostStates(StateGraph):
         Creates all needed fan-out objects needed to delete a Post.
         """
         post = await instance.afetch_full()
-        # Fan out to each target
-        for follow in await post.aget_targets():
-            await FanOut.objects.acreate(
-                identity=follow,
-                type=FanOut.Types.post_deleted,
-                subject_post=post,
-            )
+        await cls.targets_fan_out(post, FanOut.Types.post_deleted)
         return cls.deleted_fanned_out
 
     @classmethod
@@ -70,16 +70,46 @@ class PostStates(StateGraph):
         Creates all needed fan-out objects for an edited Post.
         """
         post = await instance.afetch_full()
-        # Fan out to each target
-        for follow in await post.aget_targets():
-            await FanOut.objects.acreate(
-                identity=follow,
-                type=FanOut.Types.post_edited,
-                subject_post=post,
-            )
+        await cls.targets_fan_out(post, FanOut.Types.post_edited)
+        await post.ensure_hashtags()
         return cls.edited_fanned_out
 
 
+class PostQuerySet(models.QuerySet):
+    def local_public(self, include_replies: bool = False):
+        query = self.filter(
+            visibility__in=[
+                Post.Visibilities.public,
+                Post.Visibilities.local_only,
+            ],
+            author__local=True,
+        )
+        if not include_replies:
+            return query.filter(in_reply_to__isnull=True)
+        return query
+
+    def tagged_with(self, hashtag: str | Hashtag):
+        if isinstance(hashtag, str):
+            tag_q = models.Q(hashtags__contains=hashtag)
+        else:
+            tag_q = models.Q(hashtags__contains=hashtag.hashtag)
+            if hashtag.aliases:
+                for alias in hashtag.aliases:
+                    tag_q |= models.Q(hashtags__contains=alias)
+        return self.filter(tag_q)
+
+
+class PostManager(models.Manager):
+    def get_queryset(self):
+        return PostQuerySet(self.model, using=self._db)
+
+    def local_public(self, include_replies: bool = False):
+        return self.get_queryset().local_public(include_replies=include_replies)
+
+    def tagged_with(self, hashtag: str | Hashtag):
+        return self.get_queryset().tagged_with(hashtag=hashtag)
+
+
 class Post(StatorModel):
     """
     A post (status, toot) that is either local or remote.
@@ -155,6 +185,8 @@ class Post(StatorModel):
     created = models.DateTimeField(auto_now_add=True)
     updated = models.DateTimeField(auto_now=True)
 
+    objects = PostManager()
+
     class urls(urlman.Urls):
         view = "{self.author.urls.view}posts/{self.id}/"
         object_uri = "{self.author.actor_uri}posts/{self.id}/"
@@ -236,7 +268,9 @@ class Post(StatorModel):
         """
         Returns the content formatted for local display
         """
-        return self.linkify_mentions(sanitize_post(self.content), local=True)
+        return Hashtag.linkify_hashtags(
+            self.linkify_mentions(sanitize_post(self.content), local=True)
+        )
 
     def safe_content_remote(self):
         """
@@ -252,7 +286,7 @@ class Post(StatorModel):
 
     ### Async helpers ###
 
-    async def afetch_full(self):
+    async def afetch_full(self) -> "Post":
         """
         Returns a version of the object with all relations pre-loaded
         """
@@ -281,6 +315,8 @@ class Post(StatorModel):
                 # Maintain local-only for replies
                 if reply_to.visibility == reply_to.Visibilities.local_only:
                     visibility = reply_to.Visibilities.local_only
+            # Find hashtags in this post
+            hashtags = Hashtag.hashtags_from_content(content) or None
             # Strip all HTML and apply linebreaks filter
             content = linebreaks_filter(strip_html(content))
             # Make the Post object
@@ -291,6 +327,7 @@ class Post(StatorModel):
                 sensitive=bool(summary),
                 local=True,
                 visibility=visibility,
+                hashtags=hashtags,
                 in_reply_to=reply_to.object_uri if reply_to else None,
             )
             post.object_uri = post.urls.object_uri
@@ -312,6 +349,7 @@ class Post(StatorModel):
             self.sensitive = bool(summary)
             self.visibility = visibility
             self.edited = timezone.now()
+            self.hashtags = Hashtag.hashtags_from_content(content) or None
             self.mentions.set(self.mentions_from_content(content, self.author))
             self.save()
 
@@ -334,6 +372,18 @@ class Post(StatorModel):
                 mentions.add(identity)
         return mentions
 
+    async def ensure_hashtags(self) -> None:
+        """
+        Ensure any of the already parsed hashtags from this Post
+        have a corresponding Hashtag record.
+        """
+        # Ensure hashtags
+        if self.hashtags:
+            for hashtag in self.hashtags:
+                await Hashtag.objects.aget_or_create(
+                    hashtag=hashtag,
+                )
+
     ### ActivityPub (outbound) ###
 
     def to_ap(self) -> Dict:
diff --git a/activities/templatetags/activity_tags.py b/activities/templatetags/activity_tags.py
index 571e2d6..fb822f6 100644
--- a/activities/templatetags/activity_tags.py
+++ b/activities/templatetags/activity_tags.py
@@ -3,6 +3,8 @@ import datetime
 from django import template
 from django.utils import timezone
 
+from activities.models import Hashtag
+
 register = template.Library()
 
 
@@ -31,3 +33,14 @@ def timedeltashort(value: datetime.datetime):
         years = max(days // 365.25, 1)
         text = f"{years:0n}y"
     return text
+
+
+@register.filter
+def linkify_hashtags(value: str):
+    """
+    Convert hashtags in content in to /tags/<hashtag>/ links.
+    """
+    if not value:
+        return ""
+
+    return Hashtag.linkify_hashtags(value)
diff --git a/activities/views/admin/__init__.py b/activities/views/admin/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/activities/views/explore.py b/activities/views/explore.py
new file mode 100644
index 0000000..ddb1e6c
--- /dev/null
+++ b/activities/views/explore.py
@@ -0,0 +1,26 @@
+from django.views.generic import ListView
+
+from activities.models import Hashtag
+
+
+class ExploreTag(ListView):
+
+    template_name = "activities/explore_tag.html"
+    extra_context = {
+        "current_page": "explore",
+        "allows_refresh": True,
+    }
+    paginate_by = 20
+
+    def get_queryset(self):
+        return (
+            Hashtag.objects.public()
+            .filter(
+                stats__total__gt=0,
+            )
+            .order_by("-stats__total")
+        )[:20]
+
+
+class Explore(ExploreTag):
+    pass
diff --git a/activities/views/search.py b/activities/views/search.py
index b175052..4719f64 100644
--- a/activities/views/search.py
+++ b/activities/views/search.py
@@ -1,6 +1,9 @@
+from typing import Set
+
 from django import forms
 from django.views.generic import FormView
 
+from activities.models import Hashtag
 from users.models import Domain, Identity
 
 
@@ -9,13 +12,13 @@ class Search(FormView):
     template_name = "activities/search.html"
 
     class form_class(forms.Form):
-        query = forms.CharField(help_text="Search for a user by @username@domain")
-
-    def form_valid(self, form):
-        query = form.cleaned_data["query"].lstrip("@").lower()
-        results = {"identities": set()}
-        # Search identities
+        query = forms.CharField(
+            help_text="Search for a user by @username@domain or hashtag by #tagname"
+        )
 
+    def search_identities(self, query: str):
+        query = query.lstrip("@")
+        results: Set[Identity] = set()
         if "@" in query:
             username, domain = query.split("@", 1)
 
@@ -35,13 +38,35 @@ class Search(FormView):
                     )
                 identity = None
             if identity:
-                results["identities"].add(identity)
+                results.add(identity)
 
         else:
             for identity in Identity.objects.filter(username=query)[:20]:
-                results["identities"].add(identity)
+                results.add(identity)
             for identity in Identity.objects.filter(username__startswith=query)[:20]:
-                results["identities"].add(identity)
+                results.add(identity)
+        return results
+
+    def search_hashtags(self, query: str):
+        results: Set[Hashtag] = set()
+
+        if "@" in query:
+            return results
+
+        query = query.lstrip("#")
+        for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]:
+            results.add(hashtag)
+        for hashtag in Hashtag.objects.public().filter(hashtag__startswith=query)[:10]:
+            results.add(hashtag)
+        return results
+
+    def form_valid(self, form):
+        query = form.cleaned_data["query"].lower()
+        results = {
+            "identities": self.search_identities(query),
+            "hashtags": self.search_hashtags(query),
+        }
+
         # Render results
         context = self.get_context_data(form=form)
         context["results"] = results
diff --git a/activities/views/timelines.py b/activities/views/timelines.py
index 4f2a515..ffe329c 100644
--- a/activities/views/timelines.py
+++ b/activities/views/timelines.py
@@ -1,10 +1,10 @@
 from django import forms
-from django.shortcuts import redirect
+from django.shortcuts import get_object_or_404, redirect
 from django.template.defaultfilters import linebreaks_filter
 from django.utils.decorators import method_decorator
 from django.views.generic import FormView, ListView
 
-from activities.models import Post, PostInteraction, TimelineEvent
+from activities.models import Hashtag, Post, PostInteraction, TimelineEvent
 from core.models import Config
 from users.decorators import identity_required
 
@@ -61,6 +61,41 @@ class Home(FormView):
         return redirect(".")
 
 
+class Tag(ListView):
+
+    template_name = "activities/tag.html"
+    extra_context = {
+        "current_page": "tag",
+        "allows_refresh": True,
+    }
+    paginate_by = 50
+
+    def get(self, request, hashtag, *args, **kwargs):
+        tag = hashtag.lower().lstrip("#")
+        if hashtag != tag:
+            # SEO sanitize
+            return redirect(f"/tags/{tag}/", permanent=True)
+        self.hashtag = get_object_or_404(Hashtag.objects.public(), hashtag=tag)
+        return super().get(request, *args, **kwargs)
+
+    def get_queryset(self):
+        return (
+            Post.objects.local_public()
+            .tagged_with(self.hashtag)
+            .select_related("author")
+            .prefetch_related("attachments")
+            .order_by("-created")[:50]
+        )
+
+    def get_context_data(self):
+        context = super().get_context_data()
+        context["hashtag"] = self.hashtag
+        context["interactions"] = PostInteraction.get_post_interactions(
+            context["page_obj"], self.request.identity
+        )
+        return context
+
+
 class Local(ListView):
 
     template_name = "activities/local.html"
@@ -72,11 +107,7 @@ class Local(ListView):
 
     def get_queryset(self):
         return (
-            Post.objects.filter(
-                visibility=Post.Visibilities.public,
-                author__local=True,
-                in_reply_to__isnull=True,
-            )
+            Post.objects.local_public()
             .select_related("author")
             .prefetch_related("attachments")
             .order_by("-created")[:50]
-- 
cgit v1.2.3