Merge branch 'develop' into travis/fix-quarantine-list

2025-12-17 21:50:46 -07:00
parent 516b74068b f4320b5a49
commit ca8d94585f
12 changed files with 205 additions and 41 deletions
--- a/changelog.d/19231.bugfix
+++ b/changelog.d/19231.bugfix
@@ -0,0 +1 @@
+Fix a bug where Mastodon posts (and possibly other embeds) have the wrong description for URL previews.
--- a/changelog.d/19279.feature
+++ b/changelog.d/19279.feature
@@ -0,0 +1 @@
+Implemented pagination for the [MSC2666](https://github.com/matrix-org/matrix-spec-proposals/pull/2666) mutual rooms endpoint. Contributed by @tulir @ Beeper.
--- a/changelog.d/19281.feature
+++ b/changelog.d/19281.feature
@@ -0,0 +1 @@
+Admin API: add worker support to `GET /_synapse/admin/v2/users/<user_id>`.
--- a/changelog.d/19300.feature
+++ b/changelog.d/19300.feature
@@ -0,0 +1 @@
+Improve proxy support for the `federation_client.py` dev script. Contributed by Denis Kasak (@dkasak).
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -255,6 +255,8 @@ information.
    ^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$
    ^/_matrix/client/(r0|v3|unstable)/capabilities$
    ^/_matrix/client/(r0|v3|unstable)/notifications$
+
+    # Admin API requests
    ^/_synapse/admin/v1/rooms/[^/]+$

    # Encryption requests
@@ -300,6 +302,9 @@ Additionally, the following REST endpoints can be handled for GET requests:
    # Presence requests
    ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/

+    # Admin API requests
+    ^/_synapse/admin/v2/users/[^/]+$
+
 Pagination requests can also be handled, but all requests for a given
 room must be routed to the same instance. Additionally, care must be taken to
 ensure that the purge history admin API is not used while pagination requests
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -145,7 +145,7 @@ def request(
    print("Requesting %s" % dest, file=sys.stderr)

    s = requests.Session()
-    s.mount("matrix-federation://", MatrixConnectionAdapter())
+    s.mount("matrix-federation://", MatrixConnectionAdapter(verify_tls=verify_tls))

    headers: dict[str, str] = {
        "Authorization": authorization_headers[0],
@@ -267,6 +267,17 @@ def read_args_from_config(args: argparse.Namespace) -> None:


 class MatrixConnectionAdapter(HTTPAdapter):
+    """
+    A Matrix federation-aware HTTP Adapter.
+    """
+
+    verify_tls: bool
+    """whether to verify the remote server's TLS certificate."""
+
+    def __init__(self, verify_tls: bool = True) -> None:
+        self.verify_tls = verify_tls
+        super().__init__()
+
    def send(
        self,
        request: PreparedRequest,
@@ -280,7 +291,7 @@ class MatrixConnectionAdapter(HTTPAdapter):
        assert isinstance(request.url, str)
        parsed = urlparse.urlsplit(request.url)
        server_name = parsed.netloc
-        well_known = self._get_well_known(parsed.netloc)
+        well_known = self._get_well_known(parsed.netloc, verify_tls=self.verify_tls)

        if well_known:
            server_name = well_known
@@ -318,6 +329,21 @@ class MatrixConnectionAdapter(HTTPAdapter):
        print(
            f"Connecting to {host}:{port} with SNI {ssl_server_name}", file=sys.stderr
        )
+
+        if proxies:
+            scheme = parsed.scheme
+            if isinstance(scheme, bytes):
+                scheme = scheme.decode("utf-8")
+
+            proxy_for_scheme = proxies.get(scheme)
+            if proxy_for_scheme:
+                return self.proxy_manager_for(proxy_for_scheme).connection_from_host(
+                    host,
+                    port=port,
+                    scheme="https",
+                    pool_kwargs={"server_hostname": ssl_server_name},
+                )
+
        return self.poolmanager.connection_from_host(
            host,
            port=port,
@@ -368,7 +394,7 @@ class MatrixConnectionAdapter(HTTPAdapter):
            return server_name, 8448, server_name

    @staticmethod
-    def _get_well_known(server_name: str) -> str | None:
+    def _get_well_known(server_name: str, verify_tls: bool = True) -> str | None:
        if ":" in server_name:
            # explicit port, or ipv6 literal. Either way, no .well-known
            return None
@@ -379,7 +405,7 @@ class MatrixConnectionAdapter(HTTPAdapter):
        print(f"fetching {uri}", file=sys.stderr)

        try:
-            resp = requests.get(uri)
+            resp = requests.get(uri, verify=verify_tls)
            if resp.status_code != 200:
                print("%s gave %i" % (uri, resp.status_code), file=sys.stderr)
                return None
--- a/synapse/media/url_previewer.py
+++ b/synapse/media/url_previewer.py
@@ -331,10 +331,16 @@ class UrlPreviewer:
                # response failed or is incomplete.
                og_from_html = parse_html_to_open_graph(tree)

-                # Compile the Open Graph response by using the scraped
-                # information from the HTML and overlaying any information
-                # from the oEmbed response.
-                og = {**og_from_html, **og_from_oembed}
+                # Compile an Open Graph response by combining the oEmbed response
+                # and the information from the HTML, with information in the HTML
+                # preferred.
+                #
+                # The ordering here is intentional: certain websites (especially
+                # SPA JavaScript-based ones) including Mastodon and YouTube provide
+                # almost complete OpenGraph descriptions but only stubs for oEmbed,
+                # with further oEmbed information being populated with JavaScript,
+                # that Synapse won't execute.
+                og = og_from_oembed | og_from_html

                await self._precache_image_url(user, media_info, og)
            else:
--- a/synapse/rest/admin/init.py
+++ b/synapse/rest/admin/init.py
@@ -119,6 +119,7 @@ from synapse.rest.admin.users import (
    UserRegisterServlet,
    UserReplaceMasterCrossSigningKeyRestServlet,
    UserRestServletV2,
+    UserRestServletV2Get,
    UsersRestServletV2,
    UsersRestServletV3,
    UserTokenRestServlet,
@@ -281,6 +282,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
        # matrix_authentication_service integration uses the dedicated MAS API.
        if hs.config.experimental.msc3861.enabled:
            register_servlets_for_msc3861_delegation(hs, http_server)
+        else:
+            UserRestServletV2Get(hs).register(http_server)

        return

--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -210,7 +210,7 @@ class UsersRestServletV3(UsersRestServletV2):
        return parse_boolean(request, "deactivated")


-class UserRestServletV2(RestServlet):
+class UserRestServletV2Get(RestServlet):
    PATTERNS = admin_patterns("/users/(?P<user_id>[^/]*)$", "v2")

    """Get request to list user details.
@@ -220,22 +220,6 @@ class UserRestServletV2(RestServlet):

    returns:
        200 OK with user details if success otherwise an error.
-
-    Put request to allow an administrator to add or modify a user.
-    This needs user to have administrator access in Synapse.
-    We use PUT instead of POST since we already know the id of the user
-    object to create. POST could be used to create guests.
-
-    PUT /_synapse/admin/v2/users/<user_id>
-    {
-        "password": "secret",
-        "displayname": "User"
-    }
-
-    returns:
-        201 OK with new user object if user was created or
-        200 OK with modified user object if user was modified
-        otherwise an error.
    """

    def __init__(self, hs: "HomeServer"):
@@ -267,6 +251,28 @@ class UserRestServletV2(RestServlet):

        return HTTPStatus.OK, user_info_dict

+
+class UserRestServletV2(UserRestServletV2Get):
+    """
+    Put request to allow an administrator to add or modify a user.
+    This needs user to have administrator access in Synapse.
+    We use PUT instead of POST since we already know the id of the user
+    object to create. POST could be used to create guests.
+
+    Note: This inherits from `UserRestServletV2Get`, so also supports the `GET` route.
+
+    PUT /_synapse/admin/v2/users/<user_id>
+    {
+        "password": "secret",
+        "displayname": "User"
+    }
+
+    returns:
+        201 OK with new user object if user was created or
+        200 OK with modified user object if user was modified
+        otherwise an error.
+    """
+
    async def on_PUT(
        self, request: SynapseRequest, user_id: str
    ) -> tuple[int, JsonMapping]:
--- a/synapse/rest/client/mutual_rooms.py
+++ b/synapse/rest/client/mutual_rooms.py
@@ -19,9 +19,12 @@
 #
 #
 import logging
+from bisect import bisect
 from http import HTTPStatus
 from typing import TYPE_CHECKING

+from unpaddedbase64 import decode_base64, encode_base64
+
 from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_strings_from_args
@@ -35,10 +38,34 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+MUTUAL_ROOMS_BATCH_LIMIT = 100
+
+
+def _parse_mutual_rooms_batch_token_args(args: dict[bytes, list[bytes]]) -> str | None:
+    from_batches = parse_strings_from_args(args, "from")
+    if not from_batches:
+        return None
+    if len(from_batches) > 1:
+        raise SynapseError(
+            HTTPStatus.BAD_REQUEST,
+            "Duplicate from query parameter",
+            errcode=Codes.INVALID_PARAM,
+        )
+    if from_batches[0]:
+        try:
+            return decode_base64(from_batches[0]).decode("utf-8")
+        except Exception:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Malformed from token",
+                errcode=Codes.INVALID_PARAM,
+            )
+    return None
+

 class UserMutualRoomsServlet(RestServlet):
    """
-    GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id} HTTP/1.1
+    GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id}&from={token} HTTP/1.1
    """

    PATTERNS = client_patterns(
@@ -56,6 +83,7 @@ class UserMutualRoomsServlet(RestServlet):
        args: dict[bytes, list[bytes]] = request.args  # type: ignore

        user_ids = parse_strings_from_args(args, "user_id", required=True)
+        from_batch = _parse_mutual_rooms_batch_token_args(args)

        if len(user_ids) > 1:
            raise SynapseError(
@@ -64,29 +92,52 @@ class UserMutualRoomsServlet(RestServlet):
                errcode=Codes.INVALID_PARAM,
            )

-        # We don't do batching, so a batch token is illegal by default
-        if b"batch_token" in args:
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "Unknown batch_token",
-                errcode=Codes.INVALID_PARAM,
-            )
-
        user_id = user_ids[0]

        requester = await self.auth.get_user_by_req(request)
        if user_id == requester.user.to_string():
            raise SynapseError(
-                HTTPStatus.UNPROCESSABLE_ENTITY,
+                HTTPStatus.BAD_REQUEST,
                "You cannot request a list of shared rooms with yourself",
-                errcode=Codes.INVALID_PARAM,
+                errcode=Codes.UNKNOWN,
            )

-        rooms = await self.store.get_mutual_rooms_between_users(
-            frozenset((requester.user.to_string(), user_id))
+        # Sort here instead of the database function, so that we don't expose
+        # clients to any unrelated changes to the sorting algorithm.
+        rooms = sorted(
+            await self.store.get_mutual_rooms_between_users(
+                frozenset((requester.user.to_string(), user_id))
+            )
        )

-        return 200, {"joined": list(rooms)}
+        if from_batch:
+            # A from_batch token was provided, so cut off any rooms where the ID is
+            # lower than or equal to the token. This method doesn't care whether the
+            # provided token room still exists, nor whether it's even a real room ID.
+            #
+            # However, if rooms with a lower ID are added after the token was issued,
+            # they will not be included until the client makes a new request without a
+            # from token. This is considered acceptable, as clients generally won't
+            # persist these results for long periods.
+            rooms = rooms[bisect(rooms, from_batch) :]
+
+        if len(rooms) <= MUTUAL_ROOMS_BATCH_LIMIT:
+            # We've reached the end of the list, don't return a batch token
+            return 200, {"joined": rooms}
+
+        rooms = rooms[:MUTUAL_ROOMS_BATCH_LIMIT]
+        # We use urlsafe unpadded base64 encoding for the batch token in order to
+        # handle funny room IDs in old pre-v12 rooms properly. We also truncate it
+        # to stay within the 255-character limit of opaque tokens.
+        next_batch = encode_base64(rooms[-1].encode("utf-8"), urlsafe=True)[:255]
+        # Due to the truncation, it is technically possible to have conflicting next
+        # batches by creating hundreds of rooms with the same 191 character prefix
+        # in the room ID. In the event that some silly user does that, don't let
+        # them paginate further.
+        if next_batch == from_batch:
+            return 200, {"joined": rooms}
+
+        return 200, {"joined": list(rooms), "next_batch": next_batch}


 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
--- a/tests/rest/client/test_mutual_rooms.py
+++ b/tests/rest/client/test_mutual_rooms.py
@@ -55,12 +55,16 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase):

    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
        self.store = hs.get_datastores().main
+        mutual_rooms.MUTUAL_ROOMS_BATCH_LIMIT = 10

-    def _get_mutual_rooms(self, token: str, other_user: str) -> FakeChannel:
+    def _get_mutual_rooms(
+        self, token: str, other_user: str, since_token: str | None = None
+    ) -> FakeChannel:
        return self.make_request(
            "GET",
            "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms"
-            f"?user_id={quote(other_user)}",
+            f"?user_id={quote(other_user)}"
+            + (f"&from={quote(since_token)}" if since_token else ""),
            access_token=token,
        )

@@ -141,6 +145,52 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase):
        for room_id_id in channel.json_body["joined"]:
            self.assertIn(room_id_id, [room_id_one, room_id_two])

+    def _create_rooms_for_pagination_test(
+        self, count: int
+    ) -> tuple[str, str, list[str]]:
+        u1 = self.register_user("user1", "pass")
+        u1_token = self.login(u1, "pass")
+        u2 = self.register_user("user2", "pass")
+        u2_token = self.login(u2, "pass")
+        room_ids = []
+        for i in range(count):
+            room_id = self.helper.create_room_as(u1, is_public=i % 2 == 0, tok=u1_token)
+            self.helper.invite(room_id, src=u1, targ=u2, tok=u1_token)
+            self.helper.join(room_id, user=u2, tok=u2_token)
+            room_ids.append(room_id)
+        room_ids.sort()
+        return u1_token, u2, room_ids
+
+    def test_shared_room_list_pagination_two_pages(self) -> None:
+        u1_token, u2, room_ids = self._create_rooms_for_pagination_test(15)
+
+        channel = self._get_mutual_rooms(u1_token, u2)
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(channel.json_body["joined"], room_ids[0:10])
+        self.assertIn("next_batch", channel.json_body)
+
+        channel = self._get_mutual_rooms(u1_token, u2, channel.json_body["next_batch"])
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(channel.json_body["joined"], room_ids[10:20])
+        self.assertNotIn("next_batch", channel.json_body)
+
+    def test_shared_room_list_pagination_one_page(self) -> None:
+        u1_token, u2, room_ids = self._create_rooms_for_pagination_test(10)
+
+        channel = self._get_mutual_rooms(u1_token, u2)
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(channel.json_body["joined"], room_ids)
+        self.assertNotIn("next_batch", channel.json_body)
+
+    def test_shared_room_list_pagination_invalid_token(self) -> None:
+        u1_token, u2, room_ids = self._create_rooms_for_pagination_test(10)
+
+        channel = self._get_mutual_rooms(u1_token, u2, "!<>##faketoken")
+        self.assertEqual(400, channel.code, channel.result)
+        self.assertEqual(
+            "M_INVALID_PARAM", channel.json_body["errcode"], channel.result
+        )
+
    def test_shared_room_list_after_leave(self) -> None:
        """
        A room should no longer be considered shared if the other
@@ -172,3 +222,14 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase):
        channel = self._get_mutual_rooms(u2_token, u1)
        self.assertEqual(200, channel.code, channel.result)
        self.assertEqual(len(channel.json_body["joined"]), 0)
+
+    def test_shared_room_list_nonexistent_user(self) -> None:
+        u1 = self.register_user("user1", "pass")
+        u1_token = self.login(u1, "pass")
+
+        # Check shared rooms from user1's perspective.
+        # We should see the one room in common
+        channel = self._get_mutual_rooms(u1_token, "@meow:example.com")
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(len(channel.json_body["joined"]), 0)
+        self.assertNotIn("next_batch", channel.json_body)
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -198,7 +198,9 @@ def default_config(
        "rc_invites": {
            "per_room": {"per_second": 10000, "burst_count": 10000},
            "per_user": {"per_second": 10000, "burst_count": 10000},
+            "per_issuer": {"per_second": 10000, "burst_count": 10000},
        },
+        "rc_room_creation": {"per_second": 10000, "burst_count": 10000},
        "rc_3pid_validation": {"per_second": 10000, "burst_count": 10000},
        "rc_presence": {"per_user": {"per_second": 10000, "burst_count": 10000}},
        "saml2_enabled": False,
				`@@ -0,0 +1 @@`
				`Fix a bug where Mastodon posts (and possibly other embeds) have the wrong description for URL previews.`
				`@@ -0,0 +1 @@`
				`Implemented pagination for the [MSC2666](https://github.com/matrix-org/matrix-spec-proposals/pull/2666) mutual rooms endpoint. Contributed by @tulir @ Beeper.`
				`@@ -0,0 +1 @@`
				Admin API: add worker support to `GET /_synapse/admin/v2/users/<user_id>`.
				`@@ -0,0 +1 @@`
				Improve proxy support for the `federation_client.py` dev script. Contributed by Denis Kasak (@dkasak).