summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Godwin2022-12-10 12:16:08 -0700
committerAndrew Godwin2022-12-10 12:16:08 -0700
commit3595af7bd239f3843aff3ae06df8932cff23173d (patch)
tree84b8a0432fb89f253808be11275e2f78fc57bf42
parent9a978786d4eac0139b5606e22c605450adbe7a12 (diff)
downloadtakahe-3595af7bd239f3843aff3ae06df8932cff23173d.tar.gz
takahe-3595af7bd239f3843aff3ae06df8932cff23173d.tar.bz2
takahe-3595af7bd239f3843aff3ae06df8932cff23173d.zip
Media proxy, caching and tuning docs
Fixes #67
-rw-r--r--.gitignore1
-rw-r--r--activities/models/post_attachment.py4
-rw-r--r--activities/views/posts.py2
-rw-r--r--docs/installation.rst9
-rw-r--r--docs/tuning.rst146
-rw-r--r--mediaproxy/__init__.py0
-rw-r--r--mediaproxy/apps.py6
-rw-r--r--mediaproxy/views.py101
-rw-r--r--takahe/settings.py13
-rw-r--r--takahe/urls.py17
-rw-r--r--users/models/identity.py4
-rw-r--r--users/views/identity.py2
12 files changed, 285 insertions, 20 deletions
diff --git a/.gitignore b/.gitignore
index 8308fed..bc256e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
.vscode
/*.env
/build
+/cache/
/docs/_build
/media/
/static-collected
diff --git a/activities/models/post_attachment.py b/activities/models/post_attachment.py
index 7feaba5..932ae65 100644
--- a/activities/models/post_attachment.py
+++ b/activities/models/post_attachment.py
@@ -77,13 +77,13 @@ class PostAttachment(StatorModel):
elif self.file:
return self.file.url
else:
- return self.remote_url
+ return f"/proxy/post_attachment/{self.pk}/"
def full_url(self):
if self.file:
return self.file.url
else:
- return self.remote_url
+ return f"/proxy/post_attachment/{self.pk}/"
### ActivityPub ###
diff --git a/activities/views/posts.py b/activities/views/posts.py
index d343567..ccc38fc 100644
--- a/activities/views/posts.py
+++ b/activities/views/posts.py
@@ -3,6 +3,7 @@ from django.db import models
from django.http import JsonResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.utils.decorators import method_decorator
+from django.views.decorators.vary import vary_on_headers
from django.views.generic import TemplateView, View
from activities.models import Post, PostInteraction, PostInteractionStates, PostStates
@@ -15,6 +16,7 @@ from users.shortcuts import by_handle_or_404
@method_decorator(
cache_page_by_ap_json("cache_timeout_page_post", public_only=True), name="dispatch"
)
+@method_decorator(vary_on_headers("Accept"), name="dispatch")
class Individual(TemplateView):
template_name = "activities/post.html"
diff --git a/docs/installation.rst b/docs/installation.rst
index b268377..f8b8937 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -252,9 +252,8 @@ You should select the "Domains" link in the sidebar and create one, and then
you will be able to make your first identity.
-Scaling
--------
+Tuning and Scaling
+------------------
-You can run as many copies of the webserver and workers as you like; the main
-limitation will be your database server's processing power and number of
-allowed connections.
+See :doc:`/tuning` for all the things you should tweak as your server gains
+users. We recommend setting up caches early on!
diff --git a/docs/tuning.rst b/docs/tuning.rst
index 9e959ec..4fdc43b 100644
--- a/docs/tuning.rst
+++ b/docs/tuning.rst
@@ -5,6 +5,39 @@ This page contains a collection of tips and settings that can be used to
tune your server based upon its users and the other servers it federates
with.
+Scaling
+-------
+
+The only bottleneck, and single point of failure in a Takahē installation is
+its database; no permanent state is stored elsewhere.
+
+Provided your database is happy (and PostgreSQL does a very good job of just
+using more resources if you give them to it), you can:
+
+* Run more webserver containers to handle a higher request load (requests
+ come from both users and other ActivityPub servers trying to forward you
+ messages). Consider setting up the DEFAULT cache under high request load, too.
+
+* Run more Stator worker containers to handle a higher processing load (Stator
+ handles pulling profiles, fanning out messages to followers, and processing
+ stats, among others). You'll generally see Stator load climb roughly in
+ relation to the sum of the number of followers each user in your instance has;
+ a "celebrity" or other popular account will give Stator a lot of work as it
+ has to send a copy of each of their posts to every follower, separately.
+
+As you scale up the number of containers, keep the PostgreSQL connection limit
+in mind; this is generally the first thing that will fail, as Stator workers in
+particular are quite connection-hungry (the parallel nature of their internal
+processing means they might be working on 50 different objects at once). It's
+generally a good idea to set it as high as your PostgreSQL server will take
+(consult PostgreSQL tuning guides for the effect changing that settting has
+on memory usage, specifically).
+
+If you end up having a large server that is running into database performance
+problems, please get in touch with us and discuss it; Takahē is young enough
+that we need data and insight from those installations to help optimise it more.
+
+
Federating
----------
@@ -17,22 +50,115 @@ Environment Variable:
Caching
---------
+-------
By default Takakē has caching disabled. The caching needs of a server can
varying drastically based upon the number of users and how interconnected
they are with other servers.
-Caching is configured by specifying a cache DSN in the environment variable
-``TAKAHE_CACHES_DEFAULT``. The DSN format can be any supported by
+There are multiple ways Takahē uses caches:
+
+* For caching rendered pages and responses, like user profile information.
+ These caches reduce database load on your server and improve performance.
+
+* For proxying and caching remote user images and post images. These must be
+ proxied to protect your users' privacy; also caching these reduces
+ your server's consumed bandwidth and improves users' loading times.
+
+The exact caches you can configure are:
+
+* ``TAKAHE_CACHES_DEFAULT``: Rendered page and response caching
+
+* ``TAKAHE_CACHES_MEDIA``: Remote post images and user profile header pictures
+
+* ``TAKAHE_CACHES_AVATARS``: Remote user avatars ("icons") only
+
+We recommend you set up ``TAKAHE_CACHES_MEDIA`` and ``TAKAHE_CACHES_AVATARS``
+at a bare minimum - proxying these all the time without caching will eat into
+your server's bandwidth.
+
+All caches are configured the same way - with a custom cache URI/URL. We
+support anything that is available as part of
`django-cache-url <https://github.com/epicserve/django-cache-url>`_, but
some cache backends will require additional Python packages not installed
-by default with Takahē.
+by default with Takahē. More discussion on backend is below.
+
+All items in the cache come with an expiry set - usually one week - but you
+can also configure a maximum cache size on dedicated cache datastores like
+Memcache. The key names used by the caches do not overlap, so there is
+no need to configure different key prefixes for each of Takahē's caches.
+
+
+Backends
+~~~~~~~~
+
+Redis
+#####
+
+Examples::
+ redis://redis:6379/0
+ redis://user:password@redis:6379/0
+ rediss://user:password@redis:6379/0
+
+A Redis-protocol server. Use ``redis://`` for unencrypted communication and
+``rediss://`` for TLS.
+
+Redis has a large item size limit and is suitable for all caches. We recommend
+that you keep the DEFAULT cache separate from the MEDIA and AVATARS caches, and
+set the ``maxmemory`` on both to appropriate values (the proxying caches will
+need more memory than the DEFAULT cache).
+
+
+
+Memcache
+########
+
+Examples::
+ memcached://memcache:11211?key_prefix=takahe
+ memcached://server1:11211,server2:11211
+
+A remote Memcache-protocol server (or set of servers).
+
+Memcached has a 1MB limit per key by default, so this is only suitable for the
+DEFAULT cache and not the AVATARS or MEDIA cache.
+
+
+Filesystem
+##########
+
+Examples::
+ file:///var/cache/takahe/
+
+A cache on the local disk.
+
+This *will* work with any of the cache backends, but is probably more suitable
+for MEDIA and AVATARS.
+
+Note that if you are running Takahē in a cluster, this cache will not be shared
+across different machines. This is not quite as bad as it first seems; it just
+means you will have more potential uncached requests until all machines have
+a cached copy.
+
+
+Local Memory
+############
+
+Examples::
+ locmem://default
+
+A local memory cache, inside the Python process. This will consume additional
+memory for the process, and should not be used with the MEDIA or AVATARS caches.
+
+
+CDNs
+----
+
+You can use Takahē with a "read through" CDN that takes over your site's main
+domain serving and passes some requests through to Takahē as a backend.
-**Examples**
+Takahē sets the appropriate ``Vary`` headers to ensure that cache leakage does
+not happen, and ``Last-Modified`` and ``ETag`` headers to allow the CDN to
+correctly expire cache items.
-* LocMem cache for a small server: ``locmem://default``
-* Memcache cache for a service named ``memcache`` in a docker compose file:
- ``memcached://memcache:11211?key_prefix=takahe``
-* Multiple memcache cache servers:
- ``memcached://server1:11211,server2:11211``
+Takahē does not yet support offloading local media URLs (such as profile images
+and post images) to a *separate* CDN URL; this will be coming in the future.
diff --git a/mediaproxy/__init__.py b/mediaproxy/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mediaproxy/__init__.py
diff --git a/mediaproxy/apps.py b/mediaproxy/apps.py
new file mode 100644
index 0000000..6b87719
--- /dev/null
+++ b/mediaproxy/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class MediaproxyConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "mediaproxy"
diff --git a/mediaproxy/views.py b/mediaproxy/views.py
new file mode 100644
index 0000000..e799e8b
--- /dev/null
+++ b/mediaproxy/views.py
@@ -0,0 +1,101 @@
+import httpx
+from django.conf import settings
+from django.core.cache import caches
+from django.http import Http404, HttpResponse
+from django.shortcuts import get_object_or_404
+from django.views.generic import View
+
+from activities.models import PostAttachment
+from users.models import Identity
+
+
+class BaseCacheView(View):
+ """
+ Base class for caching remote content.
+ """
+
+ cache_name = "media"
+ item_timeout: int | None = None
+
+ def get(self, request, **kwargs):
+ self.kwargs = kwargs
+ remote_url = self.get_remote_url()
+ cache = caches[self.cache_name]
+ cache_key = "proxy_" + remote_url
+ # See if it's already cached
+ cached_content = cache.get(cache_key)
+ if not cached_content:
+ # OK, fetch and cache it
+ try:
+ remote_response = httpx.get(
+ remote_url,
+ headers={"User-Agent": settings.TAKAHE_USER_AGENT},
+ follow_redirects=True,
+ timeout=settings.SETUP.REMOTE_TIMEOUT,
+ )
+ except (httpx.ConnectError, httpx.RequestError):
+ return HttpResponse(status=502)
+ if remote_response.status_code >= 400:
+ return HttpResponse(status=502)
+ # We got it - shove it into the cache
+ cached_content = {
+ "content": remote_response.content,
+ "mimetype": remote_response.headers.get(
+ "Content-Type", "application/octet-stream"
+ ),
+ }
+ cache.set(cache_key, cached_content, timeout=self.item_timeout)
+ return HttpResponse(
+ cached_content["content"],
+ headers={
+ "Content-Type": cached_content["mimetype"],
+ },
+ )
+
+ def get_remote_url(self):
+ raise NotImplementedError()
+
+
+class IdentityIconCacheView(BaseCacheView):
+ """
+ Caches identity icons (avatars)
+ """
+
+ cache_name = "avatars"
+ item_timeout = 86400 * 7 # One week
+
+ def get_remote_url(self):
+ self.identity = get_object_or_404(Identity, pk=self.kwargs["identity_id"])
+ if self.identity.local or not self.identity.image_uri:
+ raise Http404()
+ return self.identity.icon_uri
+
+
+class IdentityImageCacheView(BaseCacheView):
+ """
+ Caches identity profile header images
+ """
+
+ item_timeout = 86400 * 7 # One week
+
+ def get_remote_url(self):
+ self.identity = get_object_or_404(Identity, pk=self.kwargs["identity_id"])
+ if self.identity.local or not self.identity.image_uri:
+ raise Http404()
+ return self.identity.image_uri
+
+
+class PostAttachmentCacheView(BaseCacheView):
+ """
+ Caches post media (images only, videos should always be offloaded to remote)
+ """
+
+ item_timeout = 86400 * 7 # One week
+
+ def get_remote_url(self):
+ self.post_attachment = get_object_or_404(
+ PostAttachment, pk=self.kwargs["attachment_id"]
+ )
+ if not self.post_attachment.is_image():
+ raise Http404()
+ return self.post_attachment.remote_url
diff --git a/takahe/settings.py b/takahe/settings.py
index f508952..64a523a 100644
--- a/takahe/settings.py
+++ b/takahe/settings.py
@@ -118,6 +118,12 @@ class Settings(BaseSettings):
#: Default cache backend
CACHES_DEFAULT: CacheBackendUrl | None = None
+ #: User icon (avatar) caching backend
+ CACHES_AVATARS: CacheBackendUrl | None = None
+
+ #: Media caching backend
+ CACHES_MEDIA: CacheBackendUrl | None = None
+
PGHOST: str | None = None
PGPORT: int | None = 5432
PGNAME: str = "takahe"
@@ -167,6 +173,7 @@ INSTALLED_APPS = [
"activities",
"users",
"stator",
+ "mediaproxy",
]
MIDDLEWARE = [
@@ -351,7 +358,11 @@ if SETUP.MEDIA_BACKEND:
else:
raise ValueError(f"Unsupported media backend {parsed.scheme}")
-CACHES = {"default": django_cache_url.parse(SETUP.CACHES_DEFAULT or "dummy://")}
+CACHES = {
+ "default": django_cache_url.parse(SETUP.CACHES_DEFAULT or "dummy://"),
+ "avatars": django_cache_url.parse(SETUP.CACHES_AVATARS or "dummy://"),
+ "media": django_cache_url.parse(SETUP.CACHES_MEDIA or "dummy://"),
+}
if SETUP.ERROR_EMAILS:
ADMINS = [("Admin", e) for e in SETUP.ERROR_EMAILS]
diff --git a/takahe/urls.py b/takahe/urls.py
index 66f176d..98d1cd5 100644
--- a/takahe/urls.py
+++ b/takahe/urls.py
@@ -5,6 +5,7 @@ from django.views.static import serve
from activities.views import compose, explore, follows, posts, search, timelines
from core import views as core
+from mediaproxy import views as mediaproxy
from stator import views as stator
from users.views import activitypub, admin, auth, identity, settings
@@ -176,6 +177,22 @@ urlpatterns = [
core.FlatPage.as_view(title="Server Rules", config_option="policy_rules"),
name="rules",
),
+ # Media/image proxy
+ path(
+ "proxy/identity_icon/<identity_id>/",
+ mediaproxy.IdentityIconCacheView.as_view(),
+ name="proxy_identity_icon",
+ ),
+ path(
+ "proxy/identity_image/<identity_id>/",
+ mediaproxy.IdentityImageCacheView.as_view(),
+ name="proxy_identity_image",
+ ),
+ path(
+ "proxy/post_attachment/<attachment_id>/",
+ mediaproxy.PostAttachmentCacheView.as_view(),
+ name="proxy_post_attachment",
+ ),
# Well-known endpoints and system actor
path(".well-known/webfinger", activitypub.Webfinger.as_view()),
path(".well-known/host-meta", activitypub.HostMeta.as_view()),
diff --git a/users/models/identity.py b/users/models/identity.py
index c674bf4..21ac0fd 100644
--- a/users/models/identity.py
+++ b/users/models/identity.py
@@ -153,7 +153,7 @@ class Identity(StatorModel):
if self.icon:
return self.icon.url
elif self.icon_uri:
- return self.icon_uri
+ return f"/proxy/identity_icon/{self.pk}/"
else:
return static("img/unknown-icon-128.png")
@@ -164,7 +164,7 @@ class Identity(StatorModel):
if self.image:
return self.image.url
elif self.image_uri:
- return self.image_uri
+ return f"/proxy/identity_image/{self.pk}/"
@property
def safe_summary(self):
diff --git a/users/views/identity.py b/users/views/identity.py
index 4ca230a..fba640c 100644
--- a/users/views/identity.py
+++ b/users/views/identity.py
@@ -7,6 +7,7 @@ from django.core import validators
from django.http import Http404, JsonResponse
from django.shortcuts import redirect
from django.utils.decorators import method_decorator
+from django.views.decorators.vary import vary_on_headers
from django.views.generic import FormView, ListView, TemplateView, View
from activities.models import Post, PostInteraction
@@ -18,6 +19,7 @@ from users.models import Domain, Follow, FollowStates, Identity, IdentityStates
from users.shortcuts import by_handle_or_404
+@method_decorator(vary_on_headers("Accept"), name="dispatch")
@method_decorator(cache_page_by_ap_json(public_only=True), name="dispatch")
class ViewIdentity(ListView):
"""