Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support MSC3916 by adding unstable media endpoints to _matrix/client #17213

Merged
merged 8 commits into from
May 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/17213.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support MSC3916 by adding unstable media endpoints to `_matrix/client` (#17213).
4 changes: 4 additions & 0 deletions synapse/config/experimental.py
Original file line number Diff line number Diff line change
@@ -436,3 +436,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
self.msc4115_membership_on_events = experimental.get(
"msc4115_membership_on_events", False
)

self.msc3916_authenticated_media_enabled = experimental.get(
"msc3916_authenticated_media_enabled", False
)
486 changes: 485 additions & 1 deletion synapse/media/thumbnailer.py

Large diffs are not rendered by default.

205 changes: 205 additions & 0 deletions synapse/rest/client/media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2020 The Matrix.org Foundation C.I.C.
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright (C) 2024 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#

import logging
import re

from synapse.http.server import (
HttpServer,
respond_with_json,
respond_with_json_bytes,
set_corp_headers,
set_cors_headers,
)
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.media._base import (
DEFAULT_MAX_TIMEOUT_MS,
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS,
respond_404,
)
from synapse.media.media_repository import MediaRepository
from synapse.media.media_storage import MediaStorage
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.server import HomeServer
from synapse.util.stringutils import parse_and_validate_server_name

logger = logging.getLogger(__name__)


class UnstablePreviewURLServlet(RestServlet):
"""
Same as `GET /_matrix/media/r0/preview_url`, this endpoint provides a generic preview API
for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
specific additions).
This does have trade-offs compared to other designs:
* Pros:
* Simple and flexible; can be used by any clients at any point
* Cons:
* If each homeserver provides one of these independently, all the homeservers in a
room may needlessly DoS the target URI
* The URL metadata must be stored somewhere, rather than just using Matrix
itself to store the media.
* Matrix cannot be used to distribute the metadata between homeservers.
"""

PATTERNS = [
re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/preview_url$")
]

def __init__(
self,
hs: "HomeServer",
media_repo: "MediaRepository",
media_storage: MediaStorage,
):
super().__init__()

self.auth = hs.get_auth()
self.clock = hs.get_clock()
self.media_repo = media_repo
self.media_storage = media_storage
assert self.media_repo.url_previewer is not None
self.url_previewer = self.media_repo.url_previewer

async def on_GET(self, request: SynapseRequest) -> None:
requester = await self.auth.get_user_by_req(request)
url = parse_string(request, "url", required=True)
ts = parse_integer(request, "ts")
if ts is None:
ts = self.clock.time_msec()

og = await self.url_previewer.preview(url, requester.user, ts)
respond_with_json_bytes(request, 200, og, send_cors=True)


class UnstableMediaConfigResource(RestServlet):
PATTERNS = [
re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/config$")
]

def __init__(self, hs: "HomeServer"):
super().__init__()
config = hs.config
self.clock = hs.get_clock()
self.auth = hs.get_auth()
self.limits_dict = {"m.upload.size": config.media.max_upload_size}

async def on_GET(self, request: SynapseRequest) -> None:
await self.auth.get_user_by_req(request)
respond_with_json(request, 200, self.limits_dict, send_cors=True)


class UnstableThumbnailResource(RestServlet):
PATTERNS = [
re.compile(
"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)$"
)
]

def __init__(
self,
hs: "HomeServer",
media_repo: "MediaRepository",
media_storage: MediaStorage,
):
super().__init__()

self.store = hs.get_datastores().main
self.media_repo = media_repo
self.media_storage = media_storage
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self._is_mine_server_name = hs.is_mine_server_name
self._server_name = hs.hostname
self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
self.thumbnailer = ThumbnailProvider(hs, media_repo, media_storage)
self.auth = hs.get_auth()

async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
) -> None:
# Validate the server name, raising if invalid
parse_and_validate_server_name(server_name)
await self.auth.get_user_by_req(request)

set_cors_headers(request)
set_corp_headers(request)
width = parse_integer(request, "width", required=True)
height = parse_integer(request, "height", required=True)
method = parse_string(request, "method", "scale")
# TODO Parse the Accept header to get an prioritised list of thumbnail types.
m_type = "image/png"
max_timeout_ms = parse_integer(
request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS
)
max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS)

if self._is_mine_server_name(server_name):
if self.dynamic_thumbnails:
await self.thumbnailer.select_or_generate_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
else:
await self.thumbnailer.respond_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
self.media_repo.mark_recently_accessed(None, media_id)
else:
# Don't let users download media from configured domains, even if it
# is already downloaded. This is Trust & Safety tooling to make some
# media inaccessible to local users.
# See `prevent_media_downloads_from` config docs for more info.
if server_name in self.prevent_media_downloads_from:
respond_404(request)
return

remote_resp_function = (
self.thumbnailer.select_or_generate_remote_thumbnail
if self.dynamic_thumbnails
else self.thumbnailer.respond_remote_thumbnail
)
await remote_resp_function(
request,
server_name,
media_id,
width,
height,
method,
m_type,
max_timeout_ms,
)
self.media_repo.mark_recently_accessed(server_name, media_id)


def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
if hs.config.experimental.msc3916_authenticated_media_enabled:
media_repo = hs.get_media_repository()
if hs.config.media.url_preview_enabled:
UnstablePreviewURLServlet(
hs, media_repo, media_repo.media_storage
).register(http_server)
UnstableMediaConfigResource(hs).register(http_server)
UnstableThumbnailResource(hs, media_repo, media_repo.media_storage).register(
http_server
)
476 changes: 9 additions & 467 deletions synapse/rest/media/thumbnail_resource.py

Large diffs are not rendered by default.

161 changes: 80 additions & 81 deletions tests/media/test_media_storage.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
# [This file includes modifications made by New Vector Limited]
#
#
import itertools
import os
import shutil
import tempfile
@@ -46,11 +47,11 @@
from synapse.media.filepath import MediaFilePaths
from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
Copy link
Contributor Author

@H-Shay H-Shay May 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since these tests won't be moved when the old endpoints are deprecated I changed the urls in these tests to test the new URLs, I was thinking that we could drop these changes before merge as it is mostly just a proof that the new endpoints work the same as the old endpoints - or would it make sense to copy these tests and have a set for the new URLs and the old URLs (which I did below)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can use @parameterized_class to run the tests twice with different URLs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh that makes sense thanks! It's been fixed

from synapse.media.storage_provider import FileStorageProviderBackend
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.module_api import ModuleApi
from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
from synapse.rest import admin
from synapse.rest.client import login
from synapse.rest.media.thumbnail_resource import ThumbnailResource
from synapse.rest.client import login, media
from synapse.server import HomeServer
from synapse.types import JsonDict, RoomAlias
from synapse.util import Clock
@@ -153,68 +154,54 @@ class _TestImage:
is_inline: bool = True


@parameterized_class(
("test_image",),
[
# small png
(
_TestImage(
SMALL_PNG,
b"image/png",
b".png",
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000020000000200806"
b"000000737a7af40000001a49444154789cedc101010000008220"
b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
b"44ae426082"
),
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
b"0000001f15c4890000000d49444154789c636060606000000005"
b"0001a5f645400000000049454e44ae426082"
),
),
),
# small png with transparency.
(
_TestImage(
unhexlify(
b"89504e470d0a1a0a0000000d49484452000000010000000101000"
b"00000376ef9240000000274524e5300010194fdae0000000a4944"
b"4154789c636800000082008177cd72b60000000049454e44ae426"
b"082"
),
b"image/png",
b".png",
# Note that we don't check the output since it varies across
# different versions of Pillow.
),
),
# small lossless webp
(
_TestImage(
unhexlify(
b"524946461a000000574542505650384c0d0000002f0000001007"
b"1011118888fe0700"
),
b"image/webp",
b".webp",
),
),
# an empty file
(
_TestImage(
b"",
b"image/gif",
b".gif",
expected_found=False,
unable_to_thumbnail=True,
),
),
# An SVG.
(
_TestImage(
b"""<?xml version="1.0"?>
small_png = _TestImage(
SMALL_PNG,
b"image/png",
b".png",
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000020000000200806"
b"000000737a7af40000001a49444154789cedc101010000008220"
b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
b"44ae426082"
),
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
b"0000001f15c4890000000d49444154789c636060606000000005"
b"0001a5f645400000000049454e44ae426082"
),
)

small_png_with_transparency = _TestImage(
unhexlify(
b"89504e470d0a1a0a0000000d49484452000000010000000101000"
b"00000376ef9240000000274524e5300010194fdae0000000a4944"
b"4154789c636800000082008177cd72b60000000049454e44ae426"
b"082"
),
b"image/png",
b".png",
# Note that we don't check the output since it varies across
# different versions of Pillow.
)

small_lossless_webp = _TestImage(
unhexlify(
b"524946461a000000574542505650384c0d0000002f0000001007" b"1011118888fe0700"
),
b"image/webp",
b".webp",
)

empty_file = _TestImage(
b"",
b"image/gif",
b".gif",
expected_found=False,
unable_to_thumbnail=True,
)

SVG = _TestImage(
b"""<?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
@@ -223,19 +210,32 @@ class _TestImage:
<circle cx="100" cy="100" r="50" stroke="black"
stroke-width="5" fill="red" />
</svg>""",
b"image/svg",
b".svg",
expected_found=False,
unable_to_thumbnail=True,
is_inline=False,
),
),
],
b"image/svg",
b".svg",
expected_found=False,
unable_to_thumbnail=True,
is_inline=False,
)
test_images = [
small_png,
small_png_with_transparency,
small_lossless_webp,
empty_file,
SVG,
]
urls = [
"_matrix/media/r0/thumbnail",
"_matrix/client/unstable/org.matrix.msc3916/media/thumbnail",
]


@parameterized_class(("test_image", "url"), itertools.product(test_images, urls))
class MediaRepoTests(unittest.HomeserverTestCase):
servlets = [media.register_servlets]
test_image: ClassVar[_TestImage]
hijack_auth = True
user_id = "@test:user"
url: ClassVar[str]

def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
self.fetches: List[
@@ -298,6 +298,7 @@ def write_err(f: Failure) -> Failure:
"config": {"directory": self.storage_path},
}
config["media_storage_providers"] = [provider_config]
config["experimental_features"] = {"msc3916_authenticated_media_enabled": True}

hs = self.setup_test_homeserver(config=config, federation_http_client=client)

@@ -502,7 +503,7 @@ def test_thumbnail_repeated_thumbnail(self) -> None:
params = "?width=32&height=32&method=scale"
channel = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@@ -530,7 +531,7 @@ def test_thumbnail_repeated_thumbnail(self) -> None:

channel = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@@ -566,12 +567,11 @@ def _test_thumbnail(
params = "?width=32&height=32&method=" + method
channel = self.make_request(
"GET",
f"/_matrix/media/r0/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
self.pump()

headers = {
b"Content-Length": [b"%d" % (len(self.test_image.data))],
b"Content-Type": [self.test_image.content_type],
@@ -580,7 +580,6 @@ def _test_thumbnail(
(self.test_image.data, (len(self.test_image.data), headers))
)
self.pump()

if expected_found:
self.assertEqual(channel.code, 200)

@@ -603,7 +602,7 @@ def _test_thumbnail(
channel.json_body,
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ('/_matrix/media/r0/thumbnail/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
"error": f"Cannot find any thumbnails for the requested media ('/{self.url}/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
},
)
else:
@@ -613,7 +612,7 @@ def _test_thumbnail(
channel.json_body,
{
"errcode": "M_NOT_FOUND",
"error": "Not found '/_matrix/media/r0/thumbnail/example.com/12345'",
"error": f"Not found '/{self.url}/example.com/12345'",
},
)

@@ -625,12 +624,12 @@ def test_same_quality(self, method: str, desired_size: int) -> None:

content_type = self.test_image.content_type.decode()
media_repo = self.hs.get_media_repository()
thumbnail_resouce = ThumbnailResource(
thumbnail_provider = ThumbnailProvider(
self.hs, media_repo, media_repo.media_storage
)

self.assertIsNotNone(
thumbnail_resouce._select_thumbnail(
thumbnail_provider._select_thumbnail(
desired_width=desired_size,
desired_height=desired_size,
desired_method=method,
1,609 changes: 1,609 additions & 0 deletions tests/rest/client/test_media.py

Large diffs are not rendered by default.

Loading