summaryrefslogtreecommitdiffstats
path: root/activities/models/hashtag.py
blob: a850ec3a655ed3e6fe16ad1dc3fb82bf35712562 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import re
from datetime import date, timedelta

import urlman
from asgiref.sync import sync_to_async
from django.db import models
from django.utils import timezone
from django.utils.safestring import mark_safe

from core.html import strip_html
from core.models import Config
from stator.models import State, StateField, StateGraph, StatorModel


class HashtagStates(StateGraph):
    outdated = State(try_interval=300, force_initial=True)
    updated = State(try_interval=3600, attempt_immediately=False)

    outdated.transitions_to(updated)
    updated.transitions_to(outdated)

    @classmethod
    async def handle_outdated(cls, instance: "Hashtag"):
        """
        Computes the stats and other things for a Hashtag
        """
        from .post import Post

        posts_query = Post.objects.local_public().tagged_with(instance)
        total = await posts_query.acount()

        today = timezone.now().date()
        # TODO: single query
        total_today = await posts_query.filter(
            created__gte=today,
            created__lte=today + timedelta(days=1),
        ).acount()
        total_month = await posts_query.filter(
            created__year=today.year,
            created__month=today.month,
        ).acount()
        total_year = await posts_query.filter(
            created__year=today.year,
        ).acount()
        if total:
            if not instance.stats:
                instance.stats = {}
            instance.stats.update(
                {
                    "total": total,
                    today.isoformat(): total_today,
                    today.strftime("%Y-%m"): total_month,
                    today.strftime("%Y"): total_year,
                }
            )
            instance.stats_updated = timezone.now()
            await sync_to_async(instance.save)()

        return cls.updated

    @classmethod
    async def handle_updated(cls, instance: "Hashtag"):
        if instance.state_age > Config.system.hashtag_stats_max_age:
            return cls.outdated


class HashtagQuerySet(models.QuerySet):
    def public(self):
        public_q = models.Q(public=True)
        if Config.system.hashtag_unreviewed_are_public:
            public_q |= models.Q(public__isnull=True)
        return self.filter(public_q)

    def hashtag_or_alias(self, hashtag: str):
        return self.filter(
            models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag)
        )


class HashtagManager(models.Manager):
    def get_queryset(self):
        return HashtagQuerySet(self.model, using=self._db)

    def public(self):
        return self.get_queryset().public()

    def hashtag_or_alias(self, hashtag: str):
        return self.get_queryset().hashtag_or_alias(hashtag)


class Hashtag(StatorModel):

    # Normalized hashtag without the '#'
    hashtag = models.SlugField(primary_key=True, max_length=100)

    # Friendly display override
    name_override = models.CharField(max_length=100, null=True, blank=True)

    # Should this be shown in the public UI?
    public = models.BooleanField(null=True)

    # State of this Hashtag
    state = StateField(HashtagStates)

    # Metrics for this Hashtag
    stats = models.JSONField(null=True, blank=True)
    # Timestamp of last time the stats were updated
    stats_updated = models.DateTimeField(null=True, blank=True)

    # List of other hashtags that are considered similar
    aliases = models.JSONField(null=True, blank=True)

    created = models.DateTimeField(auto_now_add=True)
    updated = models.DateTimeField(auto_now=True)

    objects = HashtagManager()

    class urls(urlman.Urls):
        root = "/admin/hashtags/"
        create = "/admin/hashtags/create/"
        edit = "/admin/hashtags/{self.hashtag}/"
        delete = "{edit}delete/"
        timeline = "/tags/{self.hashtag}/"

    hashtag_regex = re.compile(r"\B#([a-zA-Z0-9(_)]+\b)(?!;)")

    def save(self, *args, **kwargs):
        self.hashtag = self.hashtag.lstrip("#")
        if self.name_override:
            self.name_override = self.name_override.lstrip("#")
        return super().save(*args, **kwargs)

    @property
    def display_name(self):
        return self.name_override or self.hashtag

    def __str__(self):
        return self.display_name

    def usage_months(self, num: int = 12) -> dict[date, int]:
        """
        Return the most recent num months of stats
        """
        if not self.stats:
            return {}
        results = {}
        for key, val in self.stats.items():
            parts = key.split("-")
            if len(parts) == 2:
                year = int(parts[0])
                month = int(parts[1])
                results[date(year, month, 1)] = val
        return dict(sorted(results.items(), reverse=True)[:num])

    def usage_days(self, num: int = 7) -> dict[date, int]:
        """
        Return the most recent num days of stats
        """
        if not self.stats:
            return {}
        results = {}
        for key, val in self.stats.items():
            parts = key.split("-")
            if len(parts) == 3:
                year = int(parts[0])
                month = int(parts[1])
                day = int(parts[2])
                results[date(year, month, day)] = val
        return dict(sorted(results.items(), reverse=True)[:num])

    @classmethod
    def hashtags_from_content(cls, content) -> list[str]:
        """
        Return a parsed and sanitized of hashtags found in content without
        leading '#'.
        """
        hashtag_hits = cls.hashtag_regex.findall(strip_html(content))
        hashtags = sorted({tag.lower() for tag in hashtag_hits})
        return list(hashtags)

    @classmethod
    def linkify_hashtags(cls, content, domain=None) -> str:
        def replacer(match):
            hashtag = match.group(1)
            if domain:
                return f'<a class="hashtag" href="https://{domain.uri_domain}/tags/{hashtag.lower()}/">#{hashtag}</a>'
            else:
                return (
                    f'<a class="hashtag" href="/tags/{hashtag.lower()}/">#{hashtag}</a>'
                )

        return mark_safe(Hashtag.hashtag_regex.sub(replacer, content))

    def to_mastodon_json(self):
        return {
            "name": self.hashtag,
            "url": self.urls.view.full(),
            "history": [],
        }