summaryrefslogtreecommitdiffstats
path: root/activities/models
diff options
context:
space:
mode:
authorMichael Manfre2022-11-28 23:41:36 -0500
committerGitHub2022-11-28 21:41:36 -0700
commitfb8f2d10984bcfa2585dc272b4c85d285b722792 (patch)
treefa9616f745c7e9e4b5cc5d1ca82d61512ee64f01 /activities/models
parent7f838433edde6a03d1b7f71da269f9756a3f91e9 (diff)
downloadtakahe-fb8f2d10984bcfa2585dc272b4c85d285b722792.tar.gz
takahe-fb8f2d10984bcfa2585dc272b4c85d285b722792.tar.bz2
takahe-fb8f2d10984bcfa2585dc272b4c85d285b722792.zip
Hashtags
Diffstat (limited to 'activities/models')
-rw-r--r--activities/models/__init__.py1
-rw-r--r--activities/models/hashtag.py187
-rw-r--r--activities/models/post.py94
3 files changed, 260 insertions, 22 deletions
diff --git a/activities/models/__init__.py b/activities/models/__init__.py
index 1ae3f4c..aa34c0f 100644
--- a/activities/models/__init__.py
+++ b/activities/models/__init__.py
@@ -1,4 +1,5 @@
from .fan_out import FanOut, FanOutStates # noqa
+from .hashtag import Hashtag, HashtagStates # noqa
from .post import Post, PostStates # noqa
from .post_attachment import PostAttachment, PostAttachmentStates # noqa
from .post_interaction import PostInteraction, PostInteractionStates # noqa
diff --git a/activities/models/hashtag.py b/activities/models/hashtag.py
new file mode 100644
index 0000000..a5754f7
--- /dev/null
+++ b/activities/models/hashtag.py
@@ -0,0 +1,187 @@
+import re
+from datetime import date, timedelta
+from typing import Dict, List
+
+import urlman
+from asgiref.sync import sync_to_async
+from django.db import models
+from django.utils import timezone
+from django.utils.safestring import mark_safe
+
+from core.models import Config
+from stator.models import State, StateField, StateGraph, StatorModel
+
+
+class HashtagStates(StateGraph):
+ outdated = State(try_interval=300, force_initial=True)
+ updated = State(try_interval=3600, attempt_immediately=False)
+
+ outdated.transitions_to(updated)
+ updated.transitions_to(outdated)
+
+ @classmethod
+ async def handle_outdated(cls, instance: "Hashtag"):
+ """
+ Computes the stats and other things for a Hashtag
+ """
+ from .post import Post
+
+ posts_query = Post.objects.local_public().tagged_with(instance)
+ total = await posts_query.acount()
+
+ today = timezone.now().date()
+ # TODO: single query
+ total_today = await posts_query.filter(
+ created__gte=today,
+ created__lte=today + timedelta(days=1),
+ ).acount()
+ total_month = await posts_query.filter(
+ created__year=today.year,
+ created__month=today.month,
+ ).acount()
+ total_year = await posts_query.filter(
+ created__year=today.year,
+ ).acount()
+ if total:
+ if not instance.stats:
+ instance.stats = {}
+ instance.stats.update(
+ {
+ "total": total,
+ today.isoformat(): total_today,
+ today.strftime("%Y-%m"): total_month,
+ today.strftime("%Y"): total_year,
+ }
+ )
+ instance.stats_updated = timezone.now()
+ await sync_to_async(instance.save)()
+
+ return cls.updated
+
+ @classmethod
+ async def handle_updated(cls, instance: "Hashtag"):
+ if instance.state_age > Config.system.hashtag_stats_max_age:
+ return cls.outdated
+
+
+class HashtagQuerySet(models.QuerySet):
+ def public(self):
+ public_q = models.Q(public=True)
+ if Config.system.hashtag_unreviewed_are_public:
+ public_q |= models.Q(public__isnull=True)
+ return self.filter(public_q)
+
+ def hashtag_or_alias(self, hashtag: str):
+ return self.filter(
+ models.Q(hashtag=hashtag) | models.Q(aliases__contains=hashtag)
+ )
+
+
+class HashtagManager(models.Manager):
+ def get_queryset(self):
+ return HashtagQuerySet(self.model, using=self._db)
+
+ def public(self):
+ return self.get_queryset().public()
+
+ def hashtag_or_alias(self, hashtag: str):
+ return self.get_queryset().hashtag_or_alias(hashtag)
+
+
+class Hashtag(StatorModel):
+
+ # Normalized hashtag without the '#'
+ hashtag = models.SlugField(primary_key=True, max_length=100)
+
+ # Friendly display override
+ name_override = models.CharField(max_length=100, null=True, blank=True)
+
+ # Should this be shown in the public UI?
+ public = models.BooleanField(null=True)
+
+ # State of this Hashtag
+ state = StateField(HashtagStates)
+
+ # Metrics for this Hashtag
+ stats = models.JSONField(null=True, blank=True)
+ # Timestamp of last time the stats were updated
+ stats_updated = models.DateTimeField(null=True, blank=True)
+
+ # List of other hashtags that are considered similar
+ aliases = models.JSONField(null=True, blank=True)
+
+ created = models.DateTimeField(auto_now_add=True)
+ updated = models.DateTimeField(auto_now=True)
+
+ objects = HashtagManager()
+
+ class urls(urlman.Urls):
+ root = "/admin/hashtags/"
+ create = "/admin/hashtags/create/"
+ edit = "/admin/hashtags/{self.hashtag}/"
+ delete = "{edit}delete/"
+ timeline = "/tags/{self.hashtag}/"
+
+ hashtag_regex = re.compile(r"((?:\B#)([a-zA-Z0-9(_)]{1,}\b))")
+
+ def save(self, *args, **kwargs):
+ self.hashtag = self.hashtag.lstrip("#")
+ if self.name_override:
+ self.name_override = self.name_override.lstrip("#")
+ return super().save(*args, **kwargs)
+
+ @property
+ def display_name(self):
+ return self.name_override or self.hashtag
+
+ def __str__(self):
+ return self.display_name
+
+ def usage_months(self, num: int = 12) -> Dict[date, int]:
+ """
+ Return the most recent num months of stats
+ """
+ if not self.stats:
+ return {}
+ results = {}
+ for key, val in self.stats.items():
+ parts = key.split("-")
+ if len(parts) == 2:
+ year = int(parts[0])
+ month = int(parts[1])
+ results[date(year, month, 1)] = val
+ return dict(sorted(results.items(), reverse=True)[:num])
+
+ def usage_days(self, num: int = 7) -> Dict[date, int]:
+ """
+ Return the most recent num days of stats
+ """
+ if not self.stats:
+ return {}
+ results = {}
+ for key, val in self.stats.items():
+ parts = key.split("-")
+ if len(parts) == 3:
+ year = int(parts[0])
+ month = int(parts[1])
+ day = int(parts[2])
+ results[date(year, month, day)] = val
+ return dict(sorted(results.items(), reverse=True)[:num])
+
+ @classmethod
+ def hashtags_from_content(cls, content) -> List[str]:
+ """
+ Return a parsed and sanitized of hashtags found in content without
+ leading '#'.
+ """
+ hashtag_hits = cls.hashtag_regex.findall(content)
+ hashtags = sorted({tag[1].lower() for tag in hashtag_hits})
+ return list(hashtags)
+
+ @classmethod
+ def linkify_hashtags(cls, content) -> str:
+ def replacer(match):
+ hashtag = match.group()
+ return f'<a class="hashtag" href="/tags/{hashtag.lstrip("#").lower()}/">{hashtag}</a>'
+
+ return mark_safe(Hashtag.hashtag_regex.sub(replacer, content))
diff --git a/activities/models/post.py b/activities/models/post.py
index f504fcb..b61abd4 100644
--- a/activities/models/post.py
+++ b/activities/models/post.py
@@ -10,6 +10,7 @@ from django.utils import timezone
from django.utils.safestring import mark_safe
from activities.models.fan_out import FanOut
+from activities.models.hashtag import Hashtag
from core.html import sanitize_post, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from stator.models import State, StateField, StateGraph, StatorModel
@@ -35,18 +36,23 @@ class PostStates(StateGraph):
edited_fanned_out.transitions_to(deleted)
@classmethod
- async def handle_new(cls, instance: "Post"):
- """
- Creates all needed fan-out objects for a new Post.
- """
- post = await instance.afetch_full()
+ async def targets_fan_out(cls, post: "Post", type_: str) -> None:
# Fan out to each target
for follow in await post.aget_targets():
await FanOut.objects.acreate(
identity=follow,
- type=FanOut.Types.post,
+ type=type_,
subject_post=post,
)
+
+ @classmethod
+ async def handle_new(cls, instance: "Post"):
+ """
+ Creates all needed fan-out objects for a new Post.
+ """
+ post = await instance.afetch_full()
+ await cls.targets_fan_out(post, FanOut.Types.post)
+ await post.ensure_hashtags()
return cls.fanned_out
@classmethod
@@ -55,13 +61,7 @@ class PostStates(StateGraph):
Creates all needed fan-out objects needed to delete a Post.
"""
post = await instance.afetch_full()
- # Fan out to each target
- for follow in await post.aget_targets():
- await FanOut.objects.acreate(
- identity=follow,
- type=FanOut.Types.post_deleted,
- subject_post=post,
- )
+ await cls.targets_fan_out(post, FanOut.Types.post_deleted)
return cls.deleted_fanned_out
@classmethod
@@ -70,16 +70,46 @@ class PostStates(StateGraph):
Creates all needed fan-out objects for an edited Post.
"""
post = await instance.afetch_full()
- # Fan out to each target
- for follow in await post.aget_targets():
- await FanOut.objects.acreate(
- identity=follow,
- type=FanOut.Types.post_edited,
- subject_post=post,
- )
+ await cls.targets_fan_out(post, FanOut.Types.post_edited)
+ await post.ensure_hashtags()
return cls.edited_fanned_out
+class PostQuerySet(models.QuerySet):
+ def local_public(self, include_replies: bool = False):
+ query = self.filter(
+ visibility__in=[
+ Post.Visibilities.public,
+ Post.Visibilities.local_only,
+ ],
+ author__local=True,
+ )
+ if not include_replies:
+ return query.filter(in_reply_to__isnull=True)
+ return query
+
+ def tagged_with(self, hashtag: str | Hashtag):
+ if isinstance(hashtag, str):
+ tag_q = models.Q(hashtags__contains=hashtag)
+ else:
+ tag_q = models.Q(hashtags__contains=hashtag.hashtag)
+ if hashtag.aliases:
+ for alias in hashtag.aliases:
+ tag_q |= models.Q(hashtags__contains=alias)
+ return self.filter(tag_q)
+
+
+class PostManager(models.Manager):
+ def get_queryset(self):
+ return PostQuerySet(self.model, using=self._db)
+
+ def local_public(self, include_replies: bool = False):
+ return self.get_queryset().local_public(include_replies=include_replies)
+
+ def tagged_with(self, hashtag: str | Hashtag):
+ return self.get_queryset().tagged_with(hashtag=hashtag)
+
+
class Post(StatorModel):
"""
A post (status, toot) that is either local or remote.
@@ -155,6 +185,8 @@ class Post(StatorModel):
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
+ objects = PostManager()
+
class urls(urlman.Urls):
view = "{self.author.urls.view}posts/{self.id}/"
object_uri = "{self.author.actor_uri}posts/{self.id}/"
@@ -236,7 +268,9 @@ class Post(StatorModel):
"""
Returns the content formatted for local display
"""
- return self.linkify_mentions(sanitize_post(self.content), local=True)
+ return Hashtag.linkify_hashtags(
+ self.linkify_mentions(sanitize_post(self.content), local=True)
+ )
def safe_content_remote(self):
"""
@@ -252,7 +286,7 @@ class Post(StatorModel):
### Async helpers ###
- async def afetch_full(self):
+ async def afetch_full(self) -> "Post":
"""
Returns a version of the object with all relations pre-loaded
"""
@@ -281,6 +315,8 @@ class Post(StatorModel):
# Maintain local-only for replies
if reply_to.visibility == reply_to.Visibilities.local_only:
visibility = reply_to.Visibilities.local_only
+ # Find hashtags in this post
+ hashtags = Hashtag.hashtags_from_content(content) or None
# Strip all HTML and apply linebreaks filter
content = linebreaks_filter(strip_html(content))
# Make the Post object
@@ -291,6 +327,7 @@ class Post(StatorModel):
sensitive=bool(summary),
local=True,
visibility=visibility,
+ hashtags=hashtags,
in_reply_to=reply_to.object_uri if reply_to else None,
)
post.object_uri = post.urls.object_uri
@@ -312,6 +349,7 @@ class Post(StatorModel):
self.sensitive = bool(summary)
self.visibility = visibility
self.edited = timezone.now()
+ self.hashtags = Hashtag.hashtags_from_content(content) or None
self.mentions.set(self.mentions_from_content(content, self.author))
self.save()
@@ -334,6 +372,18 @@ class Post(StatorModel):
mentions.add(identity)
return mentions
+ async def ensure_hashtags(self) -> None:
+ """
+ Ensure any of the already parsed hashtags from this Post
+ have a corresponding Hashtag record.
+ """
+ # Ensure hashtags
+ if self.hashtags:
+ for hashtag in self.hashtags:
+ await Hashtag.objects.aget_or_create(
+ hashtag=hashtag,
+ )
+
### ActivityPub (outbound) ###
def to_ap(self) -> Dict: