diff --git a/changelog.d/19231.bugfix b/changelog.d/19231.bugfix new file mode 100644 index 0000000000..580b642bb2 --- /dev/null +++ b/changelog.d/19231.bugfix @@ -0,0 +1 @@ +Fix a bug where Mastodon posts (and possibly other embeds) have the wrong description for URL previews. diff --git a/changelog.d/19279.feature b/changelog.d/19279.feature new file mode 100644 index 0000000000..031e48dceb --- /dev/null +++ b/changelog.d/19279.feature @@ -0,0 +1 @@ +Implemented pagination for the [MSC2666](https://github.com/matrix-org/matrix-spec-proposals/pull/2666) mutual rooms endpoint. Contributed by @tulir @ Beeper. diff --git a/changelog.d/19281.feature b/changelog.d/19281.feature new file mode 100644 index 0000000000..78d3002d90 --- /dev/null +++ b/changelog.d/19281.feature @@ -0,0 +1 @@ +Admin API: add worker support to `GET /_synapse/admin/v2/users/`. diff --git a/changelog.d/19300.feature b/changelog.d/19300.feature new file mode 100644 index 0000000000..97e43e9b28 --- /dev/null +++ b/changelog.d/19300.feature @@ -0,0 +1 @@ +Improve proxy support for the `federation_client.py` dev script. Contributed by Denis Kasak (@dkasak). diff --git a/docs/workers.md b/docs/workers.md index 2bc8afa74f..c2aef33e16 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -255,6 +255,8 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$ ^/_matrix/client/(r0|v3|unstable)/capabilities$ ^/_matrix/client/(r0|v3|unstable)/notifications$ + + # Admin API requests ^/_synapse/admin/v1/rooms/[^/]+$ # Encryption requests @@ -300,6 +302,9 @@ Additionally, the following REST endpoints can be handled for GET requests: # Presence requests ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/ + # Admin API requests + ^/_synapse/admin/v2/users/[^/]+$ + Pagination requests can also be handled, but all requests for a given room must be routed to the same instance. Additionally, care must be taken to ensure that the purge history admin API is not used while pagination requests diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py index 0fefc23b22..cb14f357cb 100755 --- a/scripts-dev/federation_client.py +++ b/scripts-dev/federation_client.py @@ -145,7 +145,7 @@ def request( print("Requesting %s" % dest, file=sys.stderr) s = requests.Session() - s.mount("matrix-federation://", MatrixConnectionAdapter()) + s.mount("matrix-federation://", MatrixConnectionAdapter(verify_tls=verify_tls)) headers: dict[str, str] = { "Authorization": authorization_headers[0], @@ -267,6 +267,17 @@ def read_args_from_config(args: argparse.Namespace) -> None: class MatrixConnectionAdapter(HTTPAdapter): + """ + A Matrix federation-aware HTTP Adapter. + """ + + verify_tls: bool + """whether to verify the remote server's TLS certificate.""" + + def __init__(self, verify_tls: bool = True) -> None: + self.verify_tls = verify_tls + super().__init__() + def send( self, request: PreparedRequest, @@ -280,7 +291,7 @@ class MatrixConnectionAdapter(HTTPAdapter): assert isinstance(request.url, str) parsed = urlparse.urlsplit(request.url) server_name = parsed.netloc - well_known = self._get_well_known(parsed.netloc) + well_known = self._get_well_known(parsed.netloc, verify_tls=self.verify_tls) if well_known: server_name = well_known @@ -318,6 +329,21 @@ class MatrixConnectionAdapter(HTTPAdapter): print( f"Connecting to {host}:{port} with SNI {ssl_server_name}", file=sys.stderr ) + + if proxies: + scheme = parsed.scheme + if isinstance(scheme, bytes): + scheme = scheme.decode("utf-8") + + proxy_for_scheme = proxies.get(scheme) + if proxy_for_scheme: + return self.proxy_manager_for(proxy_for_scheme).connection_from_host( + host, + port=port, + scheme="https", + pool_kwargs={"server_hostname": ssl_server_name}, + ) + return self.poolmanager.connection_from_host( host, port=port, @@ -368,7 +394,7 @@ class MatrixConnectionAdapter(HTTPAdapter): return server_name, 8448, server_name @staticmethod - def _get_well_known(server_name: str) -> str | None: + def _get_well_known(server_name: str, verify_tls: bool = True) -> str | None: if ":" in server_name: # explicit port, or ipv6 literal. Either way, no .well-known return None @@ -379,7 +405,7 @@ class MatrixConnectionAdapter(HTTPAdapter): print(f"fetching {uri}", file=sys.stderr) try: - resp = requests.get(uri) + resp = requests.get(uri, verify=verify_tls) if resp.status_code != 200: print("%s gave %i" % (uri, resp.status_code), file=sys.stderr) return None diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index 2c5e518918..7782905a7a 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -331,10 +331,16 @@ class UrlPreviewer: # response failed or is incomplete. og_from_html = parse_html_to_open_graph(tree) - # Compile the Open Graph response by using the scraped - # information from the HTML and overlaying any information - # from the oEmbed response. - og = {**og_from_html, **og_from_oembed} + # Compile an Open Graph response by combining the oEmbed response + # and the information from the HTML, with information in the HTML + # preferred. + # + # The ordering here is intentional: certain websites (especially + # SPA JavaScript-based ones) including Mastodon and YouTube provide + # almost complete OpenGraph descriptions but only stubs for oEmbed, + # with further oEmbed information being populated with JavaScript, + # that Synapse won't execute. + og = og_from_oembed | og_from_html await self._precache_image_url(user, media_info, og) else: diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index fe3eeafd9f..b209404cd1 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -119,6 +119,7 @@ from synapse.rest.admin.users import ( UserRegisterServlet, UserReplaceMasterCrossSigningKeyRestServlet, UserRestServletV2, + UserRestServletV2Get, UsersRestServletV2, UsersRestServletV3, UserTokenRestServlet, @@ -281,6 +282,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: # matrix_authentication_service integration uses the dedicated MAS API. if hs.config.experimental.msc3861.enabled: register_servlets_for_msc3861_delegation(hs, http_server) + else: + UserRestServletV2Get(hs).register(http_server) return diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 406ad8f406..ccd34d17d8 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -210,7 +210,7 @@ class UsersRestServletV3(UsersRestServletV2): return parse_boolean(request, "deactivated") -class UserRestServletV2(RestServlet): +class UserRestServletV2Get(RestServlet): PATTERNS = admin_patterns("/users/(?P[^/]*)$", "v2") """Get request to list user details. @@ -220,22 +220,6 @@ class UserRestServletV2(RestServlet): returns: 200 OK with user details if success otherwise an error. - - Put request to allow an administrator to add or modify a user. - This needs user to have administrator access in Synapse. - We use PUT instead of POST since we already know the id of the user - object to create. POST could be used to create guests. - - PUT /_synapse/admin/v2/users/ - { - "password": "secret", - "displayname": "User" - } - - returns: - 201 OK with new user object if user was created or - 200 OK with modified user object if user was modified - otherwise an error. """ def __init__(self, hs: "HomeServer"): @@ -267,6 +251,28 @@ class UserRestServletV2(RestServlet): return HTTPStatus.OK, user_info_dict + +class UserRestServletV2(UserRestServletV2Get): + """ + Put request to allow an administrator to add or modify a user. + This needs user to have administrator access in Synapse. + We use PUT instead of POST since we already know the id of the user + object to create. POST could be used to create guests. + + Note: This inherits from `UserRestServletV2Get`, so also supports the `GET` route. + + PUT /_synapse/admin/v2/users/ + { + "password": "secret", + "displayname": "User" + } + + returns: + 201 OK with new user object if user was created or + 200 OK with modified user object if user was modified + otherwise an error. + """ + async def on_PUT( self, request: SynapseRequest, user_id: str ) -> tuple[int, JsonMapping]: diff --git a/synapse/rest/client/mutual_rooms.py b/synapse/rest/client/mutual_rooms.py index 3e5316c4b7..a6a913db34 100644 --- a/synapse/rest/client/mutual_rooms.py +++ b/synapse/rest/client/mutual_rooms.py @@ -19,9 +19,12 @@ # # import logging +from bisect import bisect from http import HTTPStatus from typing import TYPE_CHECKING +from unpaddedbase64 import decode_base64, encode_base64 + from synapse.api.errors import Codes, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_strings_from_args @@ -35,10 +38,34 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +MUTUAL_ROOMS_BATCH_LIMIT = 100 + + +def _parse_mutual_rooms_batch_token_args(args: dict[bytes, list[bytes]]) -> str | None: + from_batches = parse_strings_from_args(args, "from") + if not from_batches: + return None + if len(from_batches) > 1: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Duplicate from query parameter", + errcode=Codes.INVALID_PARAM, + ) + if from_batches[0]: + try: + return decode_base64(from_batches[0]).decode("utf-8") + except Exception: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Malformed from token", + errcode=Codes.INVALID_PARAM, + ) + return None + class UserMutualRoomsServlet(RestServlet): """ - GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id} HTTP/1.1 + GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id}&from={token} HTTP/1.1 """ PATTERNS = client_patterns( @@ -56,6 +83,7 @@ class UserMutualRoomsServlet(RestServlet): args: dict[bytes, list[bytes]] = request.args # type: ignore user_ids = parse_strings_from_args(args, "user_id", required=True) + from_batch = _parse_mutual_rooms_batch_token_args(args) if len(user_ids) > 1: raise SynapseError( @@ -64,29 +92,52 @@ class UserMutualRoomsServlet(RestServlet): errcode=Codes.INVALID_PARAM, ) - # We don't do batching, so a batch token is illegal by default - if b"batch_token" in args: - raise SynapseError( - HTTPStatus.BAD_REQUEST, - "Unknown batch_token", - errcode=Codes.INVALID_PARAM, - ) - user_id = user_ids[0] requester = await self.auth.get_user_by_req(request) if user_id == requester.user.to_string(): raise SynapseError( - HTTPStatus.UNPROCESSABLE_ENTITY, + HTTPStatus.BAD_REQUEST, "You cannot request a list of shared rooms with yourself", - errcode=Codes.INVALID_PARAM, + errcode=Codes.UNKNOWN, ) - rooms = await self.store.get_mutual_rooms_between_users( - frozenset((requester.user.to_string(), user_id)) + # Sort here instead of the database function, so that we don't expose + # clients to any unrelated changes to the sorting algorithm. + rooms = sorted( + await self.store.get_mutual_rooms_between_users( + frozenset((requester.user.to_string(), user_id)) + ) ) - return 200, {"joined": list(rooms)} + if from_batch: + # A from_batch token was provided, so cut off any rooms where the ID is + # lower than or equal to the token. This method doesn't care whether the + # provided token room still exists, nor whether it's even a real room ID. + # + # However, if rooms with a lower ID are added after the token was issued, + # they will not be included until the client makes a new request without a + # from token. This is considered acceptable, as clients generally won't + # persist these results for long periods. + rooms = rooms[bisect(rooms, from_batch) :] + + if len(rooms) <= MUTUAL_ROOMS_BATCH_LIMIT: + # We've reached the end of the list, don't return a batch token + return 200, {"joined": rooms} + + rooms = rooms[:MUTUAL_ROOMS_BATCH_LIMIT] + # We use urlsafe unpadded base64 encoding for the batch token in order to + # handle funny room IDs in old pre-v12 rooms properly. We also truncate it + # to stay within the 255-character limit of opaque tokens. + next_batch = encode_base64(rooms[-1].encode("utf-8"), urlsafe=True)[:255] + # Due to the truncation, it is technically possible to have conflicting next + # batches by creating hundreds of rooms with the same 191 character prefix + # in the room ID. In the event that some silly user does that, don't let + # them paginate further. + if next_batch == from_batch: + return 200, {"joined": rooms} + + return 200, {"joined": list(rooms), "next_batch": next_batch} def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: diff --git a/tests/rest/client/test_mutual_rooms.py b/tests/rest/client/test_mutual_rooms.py index ea063707aa..f78c67fcd9 100644 --- a/tests/rest/client/test_mutual_rooms.py +++ b/tests/rest/client/test_mutual_rooms.py @@ -55,12 +55,16 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main + mutual_rooms.MUTUAL_ROOMS_BATCH_LIMIT = 10 - def _get_mutual_rooms(self, token: str, other_user: str) -> FakeChannel: + def _get_mutual_rooms( + self, token: str, other_user: str, since_token: str | None = None + ) -> FakeChannel: return self.make_request( "GET", "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms" - f"?user_id={quote(other_user)}", + f"?user_id={quote(other_user)}" + + (f"&from={quote(since_token)}" if since_token else ""), access_token=token, ) @@ -141,6 +145,52 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase): for room_id_id in channel.json_body["joined"]: self.assertIn(room_id_id, [room_id_one, room_id_two]) + def _create_rooms_for_pagination_test( + self, count: int + ) -> tuple[str, str, list[str]]: + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + room_ids = [] + for i in range(count): + room_id = self.helper.create_room_as(u1, is_public=i % 2 == 0, tok=u1_token) + self.helper.invite(room_id, src=u1, targ=u2, tok=u1_token) + self.helper.join(room_id, user=u2, tok=u2_token) + room_ids.append(room_id) + room_ids.sort() + return u1_token, u2, room_ids + + def test_shared_room_list_pagination_two_pages(self) -> None: + u1_token, u2, room_ids = self._create_rooms_for_pagination_test(15) + + channel = self._get_mutual_rooms(u1_token, u2) + self.assertEqual(200, channel.code, channel.result) + self.assertEqual(channel.json_body["joined"], room_ids[0:10]) + self.assertIn("next_batch", channel.json_body) + + channel = self._get_mutual_rooms(u1_token, u2, channel.json_body["next_batch"]) + self.assertEqual(200, channel.code, channel.result) + self.assertEqual(channel.json_body["joined"], room_ids[10:20]) + self.assertNotIn("next_batch", channel.json_body) + + def test_shared_room_list_pagination_one_page(self) -> None: + u1_token, u2, room_ids = self._create_rooms_for_pagination_test(10) + + channel = self._get_mutual_rooms(u1_token, u2) + self.assertEqual(200, channel.code, channel.result) + self.assertEqual(channel.json_body["joined"], room_ids) + self.assertNotIn("next_batch", channel.json_body) + + def test_shared_room_list_pagination_invalid_token(self) -> None: + u1_token, u2, room_ids = self._create_rooms_for_pagination_test(10) + + channel = self._get_mutual_rooms(u1_token, u2, "!<>##faketoken") + self.assertEqual(400, channel.code, channel.result) + self.assertEqual( + "M_INVALID_PARAM", channel.json_body["errcode"], channel.result + ) + def test_shared_room_list_after_leave(self) -> None: """ A room should no longer be considered shared if the other @@ -172,3 +222,14 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase): channel = self._get_mutual_rooms(u2_token, u1) self.assertEqual(200, channel.code, channel.result) self.assertEqual(len(channel.json_body["joined"]), 0) + + def test_shared_room_list_nonexistent_user(self) -> None: + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + + # Check shared rooms from user1's perspective. + # We should see the one room in common + channel = self._get_mutual_rooms(u1_token, "@meow:example.com") + self.assertEqual(200, channel.code, channel.result) + self.assertEqual(len(channel.json_body["joined"]), 0) + self.assertNotIn("next_batch", channel.json_body) diff --git a/tests/utils.py b/tests/utils.py index 4052c9a4fb..0cf97a7e8d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -198,7 +198,9 @@ def default_config( "rc_invites": { "per_room": {"per_second": 10000, "burst_count": 10000}, "per_user": {"per_second": 10000, "burst_count": 10000}, + "per_issuer": {"per_second": 10000, "burst_count": 10000}, }, + "rc_room_creation": {"per_second": 10000, "burst_count": 10000}, "rc_3pid_validation": {"per_second": 10000, "burst_count": 10000}, "rc_presence": {"per_user": {"per_second": 10000, "burst_count": 10000}}, "saml2_enabled": False,