diff --git a/changelog.d/19275.feature b/changelog.d/19275.feature new file mode 100644 index 0000000000..5147c546cf --- /dev/null +++ b/changelog.d/19275.feature @@ -0,0 +1 @@ +Server admins can bypass the quarantine media check when downloading media by setting the `admin_unsafely_bypass_quarantine` query parameter to `true` on Client-Server API media download requests. \ No newline at end of file diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index a3d99cb074..25481a8c55 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -115,6 +115,20 @@ is quarantined, Synapse will: - Quarantine any existing cached remote media. - Quarantine any future remote media. +## Downloading quarantined media + +Normally, when media is quarantined, it will return a 404 error when downloaded. +Admins can bypass this by adding `?admin_unsafely_bypass_quarantine=true` +to the [normal download URL](https://spec.matrix.org/v1.16/client-server-api/#get_matrixclientv1mediadownloadservernamemediaid). + +Bypassing the quarantine check is not recommended. Media is typically quarantined +to prevent harmful content from being served to users, which includes admins. Only +set the bypass parameter if you intentionally want to access potentially harmful +content. + +Non-admin users cannot bypass quarantine checks, even when specifying the above +query parameter. + ## Quarantining media by ID This API quarantines a single piece of local or remote media. diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index e84e842300..cb745b96ad 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -439,7 +439,11 @@ class MediaRepository: return await self.store.get_cached_remote_media(origin, media_id) async def get_local_media_info( - self, request: SynapseRequest, media_id: str, max_timeout_ms: int + self, + request: SynapseRequest, + media_id: str, + max_timeout_ms: int, + bypass_quarantine: bool = False, ) -> LocalMedia | None: """Gets the info dictionary for given local media ID. If the media has not been uploaded yet, this function will wait up to ``max_timeout_ms`` @@ -451,6 +455,7 @@ class MediaRepository: the file_id for local content.) max_timeout_ms: the maximum number of milliseconds to wait for the media to be uploaded. + bypass_quarantine: whether to bypass quarantine checks Returns: Either the info dictionary for the given local media ID or @@ -466,7 +471,7 @@ class MediaRepository: respond_404(request) return None - if media_info.quarantined_by: + if media_info.quarantined_by and not bypass_quarantine: logger.info("Media %s is quarantined", media_id) respond_404(request) return None @@ -500,6 +505,7 @@ class MediaRepository: max_timeout_ms: int, allow_authenticated: bool = True, federation: bool = False, + bypass_quarantine: bool = False, ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -513,11 +519,14 @@ class MediaRepository: media to be uploaded. allow_authenticated: whether media marked as authenticated may be served to this request federation: whether the local media being fetched is for a federation request + bypass_quarantine: whether to bypass quarantine checks Returns: Resolves once a response has successfully been written to request """ - media_info = await self.get_local_media_info(request, media_id, max_timeout_ms) + media_info = await self.get_local_media_info( + request, media_id, max_timeout_ms, bypass_quarantine=bypass_quarantine + ) if not media_info: return @@ -561,6 +570,7 @@ class MediaRepository: ip_address: str, use_federation_endpoint: bool, allow_authenticated: bool = True, + bypass_quarantine: bool = False, ) -> None: """Respond to requests for remote media. @@ -577,6 +587,7 @@ class MediaRepository: federation `/download` endpoint allow_authenticated: whether media marked as authenticated may be served to this request + bypass_quarantine: whether to bypass quarantine checks Returns: Resolves once a response has successfully been written to request @@ -609,6 +620,7 @@ class MediaRepository: ip_address, use_federation_endpoint, allow_authenticated, + bypass_quarantine=bypass_quarantine, ) # Check if the media is cached on the client, if so return 304. We need @@ -697,6 +709,7 @@ class MediaRepository: ip_address: str, use_federation_endpoint: bool, allow_authenticated: bool, + bypass_quarantine: bool = False, ) -> tuple[Responder | None, RemoteMedia]: """Looks for media in local cache, if not there then attempt to download from remote server. @@ -712,6 +725,7 @@ class MediaRepository: ip_address: the IP address of the requester use_federation_endpoint: whether to request the remote media over the new federation /download endpoint + bypass_quarantine: whether to bypass quarantine checks Returns: A tuple of responder and the media info of the file. @@ -732,7 +746,7 @@ class MediaRepository: file_id = media_info.filesystem_id file_info = FileInfo(server_name, file_id) - if media_info.quarantined_by: + if media_info.quarantined_by and not bypass_quarantine: logger.info("Media is quarantined") raise NotFoundError() diff --git a/synapse/rest/client/media.py b/synapse/rest/client/media.py index f145b03af4..4db3b01576 100644 --- a/synapse/rest/client/media.py +++ b/synapse/rest/client/media.py @@ -23,6 +23,7 @@ import logging import re +from synapse.api.errors import Codes, cs_error from synapse.http.server import ( HttpServer, respond_with_json, @@ -235,7 +236,23 @@ class DownloadResource(RestServlet): # Validate the server name, raising if invalid parse_and_validate_server_name(server_name) - await self.auth.get_user_by_req(request, allow_guest=True) + requester = await self.auth.get_user_by_req(request, allow_guest=True) + is_admin = await self.auth.is_server_admin(requester) + bypass_quarantine = False + if parse_string(request, "admin_unsafely_bypass_quarantine") == "true": + if is_admin: + logger.info("Admin bypassing quarantine for media download") + bypass_quarantine = True + else: + respond_with_json( + request, + 400, + cs_error( + "Must be a server admin to bypass quarantine", + code=Codes.UNKNOWN, + ), + send_cors=True, + ) set_cors_headers(request) set_corp_headers(request) @@ -259,7 +276,11 @@ class DownloadResource(RestServlet): if self._is_mine_server_name(server_name): await self.media_repo.get_local_media( - request, media_id, file_name, max_timeout_ms + request, + media_id, + file_name, + max_timeout_ms, + bypass_quarantine=bypass_quarantine, ) else: ip_address = request.getClientAddress().host @@ -271,6 +292,7 @@ class DownloadResource(RestServlet): max_timeout_ms, ip_address, True, + bypass_quarantine=bypass_quarantine, ) diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index f3740a8e35..77d824dcd8 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -71,14 +71,43 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): return resources def _ensure_quarantined( - self, admin_user_tok: str, server_and_media_id: str + self, + user_tok: str, + server_and_media_id: str, + include_bypass_param: bool = False, ) -> None: - """Ensure a piece of media is quarantined when trying to access it.""" + """Ensure a piece of media is quarantined when trying to access it. + + The include_bypass_param flag enables the presence of the + admin_unsafely_bypass_quarantine query parameter, but still expects that the + request will fail to download the media. + """ + if include_bypass_param: + query_string = "?admin_unsafely_bypass_quarantine=true" + channel = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/{server_and_media_id}{query_string}", + shorthand=False, + access_token=user_tok, + ) + + # Non-admins can't bypass, so this should fail regardless of whether the + # media is actually quarantined. + self.assertEqual( + 400, + channel.code, + msg=( + "Expected to receive a 400 when bypassing quarantined media: %s" + % server_and_media_id + ), + ) + + # Repeat the request, this time without the bypass parameter. channel = self.make_request( "GET", f"/_matrix/client/v1/media/download/{server_and_media_id}", shorthand=False, - access_token=admin_user_tok, + access_token=user_tok, ) # Should be quarantined @@ -91,6 +120,62 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): ), ) + def test_admin_can_bypass_quarantine(self) -> None: + self.register_user("admin", "pass", admin=True) + admin_user_tok = self.login("admin", "pass") + + # Upload some media + response = self.helper.upload_media(SMALL_PNG, tok=admin_user_tok) + + # Extract media ID from the response + server_name_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name, media_id = server_name_and_media_id.split("/") + + # Attempt to access the media + channel = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/{server_name_and_media_id}", + shorthand=False, + access_token=admin_user_tok, + ) + + # Should be successful + self.assertEqual(200, channel.code) + + # Quarantine the media + url = "/_synapse/admin/v1/media/quarantine/%s/%s" % ( + urllib.parse.quote(server_name), + urllib.parse.quote(media_id), + ) + channel = self.make_request( + "POST", + url, + access_token=admin_user_tok, + ) + self.pump(1.0) + self.assertEqual(200, channel.code, msg=channel.json_body) + + # Now access it *without* the bypass parameter - this should fail (as expected). + self._ensure_quarantined( + admin_user_tok, server_name_and_media_id, include_bypass_param=False + ) + + # Now access it *with* the bypass parameter - this should work + channel = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/{server_name_and_media_id}?admin_unsafely_bypass_quarantine=true", + shorthand=False, + access_token=admin_user_tok, + ) + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing (with bypass) quarantined media: %s" + % server_name_and_media_id + ), + ) + @parameterized.expand( [ # Attempt quarantine media APIs as non-admin @@ -154,8 +239,14 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): self.pump(1.0) self.assertEqual(200, channel.code, msg=channel.json_body) - # Attempt to access the media - self._ensure_quarantined(admin_user_tok, server_name_and_media_id) + # Attempt to access the media (and ensure non-admins can't download it, even + # with a bypass parameter). Admins cannot download it without the bypass param. + self._ensure_quarantined( + non_admin_user_tok, server_name_and_media_id, include_bypass_param=True + ) + self._ensure_quarantined( + admin_user_tok, server_name_and_media_id, include_bypass_param=False + ) @parameterized.expand( [ @@ -214,9 +305,21 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): server_and_media_id_1 = mxc_1[6:] server_and_media_id_2 = mxc_2[6:] - # Test that we cannot download any of the media anymore - self._ensure_quarantined(admin_user_tok, server_and_media_id_1) - self._ensure_quarantined(admin_user_tok, server_and_media_id_2) + # Test that we cannot download any of the media anymore, especially with the + # bypass parameter set. Admins cannot download the media without supplying the + # bypass parameter, so we check that too. + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_1, include_bypass_param=True + ) + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_2, include_bypass_param=True + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_1, include_bypass_param=False + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_2, include_bypass_param=False + ) def test_quarantine_all_media_by_user(self) -> None: self.register_user("user_admin", "pass", admin=True) @@ -263,10 +366,27 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): channel.json_body, {"num_quarantined": 3}, "Expected 3 quarantined items" ) - # Attempt to access each piece of media - self._ensure_quarantined(admin_user_tok, server_and_media_id_1) - self._ensure_quarantined(admin_user_tok, server_and_media_id_2) - self._ensure_quarantined(admin_user_tok, server_and_media_id_3) + # Attempt to access each piece of media, ensuring that it can't be downloaded + # even with a bypass parameter. Admins should not be able to download the media + # either when not supplying the bypass parameter, so we check that too. + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_1, include_bypass_param=True + ) + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_2, include_bypass_param=True + ) + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_3, include_bypass_param=True + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_1, include_bypass_param=False + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_2, include_bypass_param=False + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_3, include_bypass_param=False + ) def test_cannot_quarantine_safe_media(self) -> None: self.register_user("user_admin", "pass", admin=True) @@ -307,8 +427,14 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): ) # Attempt to access each piece of media, the first should fail, the - # second should succeed. - self._ensure_quarantined(admin_user_tok, server_and_media_id_1) + # second should succeed. We check both the non-admin user with a bypass + # parameter, and the admin user without. + self._ensure_quarantined( + non_admin_user_tok, server_and_media_id_1, include_bypass_param=True + ) + self._ensure_quarantined( + admin_user_tok, server_and_media_id_1, include_bypass_param=False + ) # Attempt to access each piece of media channel = self.make_request(