From 3c61ddbbc9ee313447c16fa7f19bdc29ce647a32 Mon Sep 17 00:00:00 2001 From: devonh Date: Mon, 24 Jun 2024 17:16:09 +0000 Subject: [PATCH 1/6] Add default values for rc_invites per_issuer to docs (#17347) A simple change to update the docs where default values were missing. ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Kim Brose <2803622+HarHarLinks@users.noreply.github.com> Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/17347.doc | 1 + docs/usage/configuration/config_documentation.md | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/17347.doc diff --git a/changelog.d/17347.doc b/changelog.d/17347.doc new file mode 100644 index 0000000000..6cd41be60f --- /dev/null +++ b/changelog.d/17347.doc @@ -0,0 +1 @@ +Add default values for `rc_invites.per_issuer` to docs. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 22c545359d..b3db078703 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1759,8 +1759,9 @@ rc_3pid_validation: ### `rc_invites` This option sets ratelimiting how often invites can be sent in a room or to a -specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10` and -`per_user` defaults to `per_second: 0.003`, `burst_count: 5`. +specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10`, +`per_user` defaults to `per_second: 0.003`, `burst_count: 5`, and `per_issuer` +defaults to `per_second: 0.3`, `burst_count: 10`. Client requests that invite user(s) when [creating a room](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3createroom) From 805e6c9a8f703a0a774321bd0755be63dcdcc807 Mon Sep 17 00:00:00 2001 From: devonh Date: Mon, 24 Jun 2024 17:18:58 +0000 Subject: [PATCH 2/6] Correct error in user_directory docs (#17348) ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Co-authored-by: reivilibre --- changelog.d/17348.doc | 1 + docs/usage/configuration/config_documentation.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/17348.doc diff --git a/changelog.d/17348.doc b/changelog.d/17348.doc new file mode 100644 index 0000000000..4ce42bbadb --- /dev/null +++ b/changelog.d/17348.doc @@ -0,0 +1 @@ +Fix an error in the docs for `search_all_users` parameter under `user_directory`. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index b3db078703..ba9f21cdee 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3807,7 +3807,8 @@ This setting defines options related to the user directory. This option has the following sub-options: * `enabled`: Defines whether users can search the user directory. If false then empty responses are returned to all queries. Defaults to true. -* `search_all_users`: Defines whether to search all users visible to your HS at the time the search is performed. If set to true, will return all users who share a room with the user from the homeserver. +* `search_all_users`: Defines whether to search all users visible to your homeserver at the time the search is performed. + If set to true, will return all users known to the homeserver matching the search query. If false, search results will only contain users visible in public rooms and users sharing a room with the requester. Defaults to false. From 6e8af8319373e1ab470f1d8eee0420f3be84184f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:07:56 -0500 Subject: [PATCH 3/6] Add `is_invite` filtering to Sliding Sync `/sync` (#17335) Based on [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575): Sliding Sync --- changelog.d/17335.feature | 1 + synapse/handlers/sliding_sync.py | 19 +++- tests/handlers/test_sliding_sync.py | 74 +++++++++++--- tests/rest/client/test_sync.py | 148 +++++++++++++++++++++++----- 4 files changed, 199 insertions(+), 43 deletions(-) create mode 100644 changelog.d/17335.feature diff --git a/changelog.d/17335.feature b/changelog.d/17335.feature new file mode 100644 index 0000000000..c6beed42ed --- /dev/null +++ b/changelog.d/17335.feature @@ -0,0 +1 @@ +Add `is_invite` filtering to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 16d94925f5..847a638bba 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -554,7 +554,7 @@ class SlidingSyncHandler: # Flatten out the map dm_room_id_set = set() - if dm_map: + if isinstance(dm_map, dict): for room_ids in dm_map.values(): # Account data should be a list of room IDs. Ignore anything else if isinstance(room_ids, list): @@ -593,8 +593,21 @@ class SlidingSyncHandler: ): filtered_room_id_set.remove(room_id) - if filters.is_invite: - raise NotImplementedError() + # Filter for rooms that the user has been invited to + if filters.is_invite is not None: + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for room_id in list(filtered_room_id_set): + room_for_user = sync_room_map[room_id] + # If we're looking for invite rooms, filter out rooms that the user is + # not invited to and vice versa + if ( + filters.is_invite and room_for_user.membership != Membership.INVITE + ) or ( + not filters.is_invite + and room_for_user.membership == Membership.INVITE + ): + filtered_room_id_set.remove(room_id) if filters.room_types: raise NotImplementedError() diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f..8dd4521b18 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1200,11 +1200,7 @@ class FilterRoomsTestCase(HomeserverTestCase): user2_tok = self.login(user2_id, "pass") # Create a normal room - room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create a DM room dm_room_id = self._create_dm_room( @@ -1261,18 +1257,10 @@ class FilterRoomsTestCase(HomeserverTestCase): user1_tok = self.login(user1_id, "pass") # Create a normal room - room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create an encrypted room - encrypted_room_id = self.helper.create_room_as( - user1_id, - is_public=False, - tok=user1_tok, - ) + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) self.helper.send_state( encrypted_room_id, EventTypes.RoomEncryption, @@ -1319,6 +1307,62 @@ class FilterRoomsTestCase(HomeserverTestCase): self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_invite_rooms(self) -> None: + """ + Test `filter.is_invite` for rooms that the user has been invited to + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create a normal room + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Create a room that user1 is invited to + invite_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.invite(invite_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + ) + + # Try with `is_invite=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_invite=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(truthy_filtered_room_map.keys(), {invite_room_id}) + + # Try with `is_invite=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_invite=False, + ), + after_rooms_token, + ) + ) + + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + class SortRoomsTestCase(HomeserverTestCase): """ diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 5195659ec2..bfb26139d3 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -19,7 +19,8 @@ # # import json -from typing import List +import logging +from typing import Dict, List from parameterized import parameterized, parameterized_class @@ -44,6 +45,8 @@ from tests.federation.transport.test_knocking import ( ) from tests.server import TimedOutException +logger = logging.getLogger(__name__) + class FilterTestCase(unittest.HomeserverTestCase): user_id = "@apple:test" @@ -1234,12 +1237,58 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.store = hs.get_datastores().main self.event_sources = hs.get_event_sources() + def _add_new_dm_to_global_account_data( + self, source_user_id: str, target_user_id: str, target_room_id: str + ) -> None: + """ + Helper to handle inserting a new DM for the source user into global account data + (handles all of the list merging). + + Args: + source_user_id: The user ID of the DM mapping we're going to update + target_user_id: User ID of the person the DM is with + target_room_id: Room ID of the DM + """ + + # Get the current DM map + existing_dm_map = self.get_success( + self.store.get_global_account_data_by_type_for_user( + source_user_id, AccountDataTypes.DIRECT + ) + ) + # Scrutinize the account data since it has no concrete type. We're just copying + # everything into a known type. It should be a mapping from user ID to a list of + # room IDs. Ignore anything else. + new_dm_map: Dict[str, List[str]] = {} + if isinstance(existing_dm_map, dict): + for user_id, room_ids in existing_dm_map.items(): + if isinstance(user_id, str) and isinstance(room_ids, list): + for room_id in room_ids: + if isinstance(room_id, str): + new_dm_map[user_id] = new_dm_map.get(user_id, []) + [ + room_id + ] + + # Add the new DM to the map + new_dm_map[target_user_id] = new_dm_map.get(target_user_id, []) + [ + target_room_id + ] + # Save the DM map to global account data + self.get_success( + self.store.add_account_data_for_user( + source_user_id, + AccountDataTypes.DIRECT, + new_dm_map, + ) + ) + def _create_dm_room( self, inviter_user_id: str, inviter_tok: str, invitee_user_id: str, invitee_tok: str, + should_join_room: bool = True, ) -> str: """ Helper to create a DM room as the "inviter" and invite the "invitee" user to the @@ -1260,24 +1309,17 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): tok=inviter_tok, extra_data={"is_direct": True}, ) - # Person that was invited joins the room - self.helper.join(room_id, invitee_user_id, tok=invitee_tok) + if should_join_room: + # Person that was invited joins the room + self.helper.join(room_id, invitee_user_id, tok=invitee_tok) # Mimic the client setting the room as a direct message in the global account - # data - self.get_success( - self.store.add_account_data_for_user( - invitee_user_id, - AccountDataTypes.DIRECT, - {inviter_user_id: [room_id]}, - ) + # data for both users. + self._add_new_dm_to_global_account_data( + invitee_user_id, inviter_user_id, room_id ) - self.get_success( - self.store.add_account_data_for_user( - inviter_user_id, - AccountDataTypes.DIRECT, - {invitee_user_id: [room_id]}, - ) + self._add_new_dm_to_global_account_data( + inviter_user_id, invitee_user_id, room_id ) return room_id @@ -1397,15 +1439,28 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): user2_tok = self.login(user2_id, "pass") # Create a DM room - dm_room_id = self._create_dm_room( + joined_dm_room_id = self._create_dm_room( inviter_user_id=user1_id, inviter_tok=user1_tok, invitee_user_id=user2_id, invitee_tok=user2_tok, + should_join_room=True, + ) + invited_dm_room_id = self._create_dm_room( + inviter_user_id=user1_id, + inviter_tok=user1_tok, + invitee_user_id=user2_id, + invitee_tok=user2_tok, + should_join_room=False, ) # Create a normal room - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user1_id, tok=user2_tok) + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Create a room that user1 is invited to + invite_room_id = self.helper.create_room_as(user1_id, tok=user2_tok) + self.helper.invite(invite_room_id, src=user2_id, targ=user1_id, tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -1413,18 +1468,34 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): self.sync_endpoint, { "lists": { + # Absense of filters does not imply "False" values + "all": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": {}, + }, + # Test single truthy filter "dms": { "ranges": [[0, 99]], "required_state": [], "timeline_limit": 1, "filters": {"is_dm": True}, }, - "foo-list": { + # Test single falsy filter + "non-dms": { "ranges": [[0, 99]], "required_state": [], "timeline_limit": 1, "filters": {"is_dm": False}, }, + # Test how multiple filters should stack (AND'd together) + "room-invites": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": {"is_dm": False, "is_invite": True}, + }, } }, access_token=user1_tok, @@ -1434,32 +1505,59 @@ class SlidingSyncTestCase(unittest.HomeserverTestCase): # Make sure it has the foo-list we requested self.assertListEqual( list(channel.json_body["lists"].keys()), - ["dms", "foo-list"], + ["all", "dms", "non-dms", "room-invites"], channel.json_body["lists"].keys(), ) - # Make sure the list includes the room we are joined to + # Make sure the lists have the correct rooms + self.assertListEqual( + list(channel.json_body["lists"]["all"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [ + invite_room_id, + room_id, + invited_dm_room_id, + joined_dm_room_id, + ], + } + ], + list(channel.json_body["lists"]["all"]), + ) self.assertListEqual( list(channel.json_body["lists"]["dms"]["ops"]), [ { "op": "SYNC", "range": [0, 99], - "room_ids": [dm_room_id], + "room_ids": [invited_dm_room_id, joined_dm_room_id], } ], list(channel.json_body["lists"]["dms"]), ) self.assertListEqual( - list(channel.json_body["lists"]["foo-list"]["ops"]), + list(channel.json_body["lists"]["non-dms"]["ops"]), [ { "op": "SYNC", "range": [0, 99], - "room_ids": [room_id], + "room_ids": [invite_room_id, room_id], } ], - list(channel.json_body["lists"]["foo-list"]), + list(channel.json_body["lists"]["non-dms"]), + ) + self.assertListEqual( + list(channel.json_body["lists"]["room-invites"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [invite_room_id], + } + ], + list(channel.json_body["lists"]["room-invites"]), ) def test_sort_list(self) -> None: From a98cb87bee18c9028d03676ce544860239e1ff34 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 09:57:34 +0100 Subject: [PATCH 4/6] Revert "Reduce device lists replication traffic." (#17360) Reverts element-hq/synapse#17333 It looks like master was still sending out replication RDATA with the old format... somehow --- changelog.d/17333.misc | 1 - synapse/replication/tcp/client.py | 19 ++-- synapse/replication/tcp/streams/_base.py | 12 +-- synapse/storage/databases/main/devices.py | 93 +++++++------------ .../storage/databases/main/end_to_end_keys.py | 4 +- tests/storage/test_devices.py | 8 -- 6 files changed, 48 insertions(+), 89 deletions(-) delete mode 100644 changelog.d/17333.misc diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc deleted file mode 100644 index d3ef0b3777..0000000000 --- a/changelog.d/17333.misc +++ /dev/null @@ -1 +0,0 @@ -Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 3dddbb70b4..2d6d49eed7 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -114,19 +114,13 @@ class ReplicationDataHandler: """ all_room_ids: Set[str] = set() if stream_name == DeviceListsStream.NAME: - if any(not row.is_signature and not row.hosts_calculated for row in rows): + if any(row.entity.startswith("@") and not row.is_signature for row in rows): prev_token = self.store.get_device_stream_token() all_room_ids = await self.store.get_all_device_list_changes( prev_token, token ) self.store.device_lists_in_rooms_have_changed(all_room_ids, token) - # If we're sending federation we need to update the device lists - # outbound pokes stream change cache with updated hosts. - if self.send_handler and any(row.hosts_calculated for row in rows): - hosts = await self.store.get_destinations_for_device(token) - self.store.device_lists_outbound_pokes_have_changed(hosts, token) - self.store.process_replication_rows(stream_name, instance_name, token, rows) # NOTE: this must be called after process_replication_rows to ensure any # cache invalidations are first handled before any stream ID advances. @@ -439,11 +433,12 @@ class FederationSenderHandler: # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if any(row.hosts_calculated for row in rows): - hosts = await self.store.get_destinations_for_device(token) - await self.federation_sender.send_device_messages( - hosts, immediate=False - ) + hosts = { + row.entity + for row in rows + if not row.entity.startswith("@") and not row.is_signature + } + await self.federation_sender.send_device_messages(hosts, immediate=False) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index d021904de7..661206c841 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -549,14 +549,10 @@ class DeviceListsStream(_StreamFromIdGen): @attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceListsStreamRow: - user_id: str + entity: str # Indicates that a user has signed their own device with their user-signing key is_signature: bool - # Indicates if this is a notification that we've calculated the hosts we - # need to send the update to. - hosts_calculated: bool - NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow @@ -598,13 +594,13 @@ class DeviceListsStream(_StreamFromIdGen): upper_limit_token = min(upper_limit_token, signatures_to_token) device_updates = [ - (stream_id, (entity, False, hosts)) - for stream_id, (entity, hosts) in device_updates + (stream_id, (entity, False)) + for stream_id, (entity,) in device_updates if stream_id <= upper_limit_token ] signatures_updates = [ - (stream_id, (entity, True, False)) + (stream_id, (entity, True)) for stream_id, (entity,) in signatures_updates if stream_id <= upper_limit_token ] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5eeca6165d..40187496e2 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -164,24 +164,22 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): prefilled_cache=user_signature_stream_prefill, ) - self._device_list_federation_stream_cache = None - if hs.should_send_federation(): - ( - device_list_federation_prefill, - device_list_federation_list_id, - ) = self.db_pool.get_cache_dict( - db_conn, - "device_lists_outbound_pokes", - entity_column="destination", - stream_column="stream_id", - max_value=device_list_max, - limit=10000, - ) - self._device_list_federation_stream_cache = StreamChangeCache( - "DeviceListFederationStreamChangeCache", - device_list_federation_list_id, - prefilled_cache=device_list_federation_prefill, - ) + ( + device_list_federation_prefill, + device_list_federation_list_id, + ) = self.db_pool.get_cache_dict( + db_conn, + "device_lists_outbound_pokes", + entity_column="destination", + stream_column="stream_id", + max_value=device_list_max, + limit=10000, + ) + self._device_list_federation_stream_cache = StreamChangeCache( + "DeviceListFederationStreamChangeCache", + device_list_federation_list_id, + prefilled_cache=device_list_federation_prefill, + ) if hs.config.worker.run_background_tasks: self._clock.looping_call( @@ -209,30 +207,23 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) -> None: for row in rows: if row.is_signature: - self._user_signature_stream_cache.entity_has_changed(row.user_id, token) + self._user_signature_stream_cache.entity_has_changed(row.entity, token) continue # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if not row.hosts_calculated: - self._device_list_stream_cache.entity_has_changed(row.user_id, token) - self.get_cached_devices_for_user.invalidate((row.user_id,)) - self._get_cached_user_device.invalidate((row.user_id,)) - self.get_device_list_last_stream_id_for_remote.invalidate( - (row.user_id,) + if row.entity.startswith("@"): + self._device_list_stream_cache.entity_has_changed(row.entity, token) + self.get_cached_devices_for_user.invalidate((row.entity,)) + self._get_cached_user_device.invalidate((row.entity,)) + self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) + + else: + self._device_list_federation_stream_cache.entity_has_changed( + row.entity, token ) - def device_lists_outbound_pokes_have_changed( - self, destinations: StrCollection, token: int - ) -> None: - assert self._device_list_federation_stream_cache is not None - - for destination in destinations: - self._device_list_federation_stream_cache.entity_has_changed( - destination, token - ) - def device_lists_in_rooms_have_changed( self, room_ids: StrCollection, token: int ) -> None: @@ -372,11 +363,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): EDU contents. """ now_stream_id = self.get_device_stream_token() - if from_stream_id == now_stream_id: - return now_stream_id, [] - - if self._device_list_federation_stream_cache is None: - raise Exception("Func can only be used on federation senders") has_changed = self._device_list_federation_stream_cache.has_entity_changed( destination, int(from_stream_id) @@ -1032,10 +1018,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): # This query Does The Right Thing where it'll correctly apply the # bounds to the inner queries. sql = """ - SELECT stream_id, user_id, hosts FROM ( - SELECT stream_id, user_id, false AS hosts FROM device_lists_stream + SELECT stream_id, entity FROM ( + SELECT stream_id, user_id AS entity FROM device_lists_stream UNION ALL - SELECT DISTINCT stream_id, user_id, true AS hosts FROM device_lists_outbound_pokes + SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes ) AS e WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -1591,14 +1577,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): get_device_list_changes_in_room_txn, ) - async def get_destinations_for_device(self, stream_id: int) -> StrCollection: - return await self.db_pool.simple_select_onecol( - table="device_lists_outbound_pokes", - keyvalues={"stream_id": stream_id}, - retcol="destination", - desc="get_destinations_for_device", - ) - class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -2134,13 +2112,12 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): stream_ids: List[int], context: Optional[Dict[str, str]], ) -> None: - if self._device_list_federation_stream_cache: - for host in hosts: - txn.call_after( - self._device_list_federation_stream_cache.entity_has_changed, - host, - stream_ids[-1], - ) + for host in hosts: + txn.call_after( + self._device_list_federation_stream_cache.entity_has_changed, + host, + stream_ids[-1], + ) now = self._clock.time_msec() stream_id_iterator = iter(stream_ids) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 9e6c9561ae..38d8785faa 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -123,9 +123,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker if stream_name == DeviceListsStream.NAME: for row in rows: assert isinstance(row, DeviceListsStream.DeviceListsStreamRow) - if not row.hosts_calculated: + if row.entity.startswith("@"): self._get_e2e_device_keys_for_federation_query_inner.invalidate( - (row.user_id,) + (row.entity,) ) super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index ba01b038ab..7f975d04ff 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -36,14 +36,6 @@ class DeviceStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main - def default_config(self) -> JsonDict: - config = super().default_config() - - # We 'enable' federation otherwise `get_device_updates_by_remote` will - # throw an exception. - config["federation_sender_instances"] = ["master"] - return config - def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None: """Add a device list change for the given device to `device_lists_outbound_pokes` table. From 554a92601a4bf61f9076adfffb613a2c19871446 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 10:34:34 +0100 Subject: [PATCH 5/6] Reintroduce "Reduce device lists replication traffic."" (#17361) Reintroduces https://github.com/element-hq/synapse/pull/17333 Turns out the reason for revert was down two master instances running --- changelog.d/17333.misc | 1 + synapse/replication/tcp/client.py | 19 ++-- synapse/replication/tcp/streams/_base.py | 12 ++- synapse/storage/databases/main/devices.py | 93 ++++++++++++------- .../storage/databases/main/end_to_end_keys.py | 4 +- tests/storage/test_devices.py | 8 ++ 6 files changed, 89 insertions(+), 48 deletions(-) create mode 100644 changelog.d/17333.misc diff --git a/changelog.d/17333.misc b/changelog.d/17333.misc new file mode 100644 index 0000000000..d3ef0b3777 --- /dev/null +++ b/changelog.d/17333.misc @@ -0,0 +1 @@ +Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 2d6d49eed7..3dddbb70b4 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -114,13 +114,19 @@ class ReplicationDataHandler: """ all_room_ids: Set[str] = set() if stream_name == DeviceListsStream.NAME: - if any(row.entity.startswith("@") and not row.is_signature for row in rows): + if any(not row.is_signature and not row.hosts_calculated for row in rows): prev_token = self.store.get_device_stream_token() all_room_ids = await self.store.get_all_device_list_changes( prev_token, token ) self.store.device_lists_in_rooms_have_changed(all_room_ids, token) + # If we're sending federation we need to update the device lists + # outbound pokes stream change cache with updated hosts. + if self.send_handler and any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + self.store.device_lists_outbound_pokes_have_changed(hosts, token) + self.store.process_replication_rows(stream_name, instance_name, token, rows) # NOTE: this must be called after process_replication_rows to ensure any # cache invalidations are first handled before any stream ID advances. @@ -433,12 +439,11 @@ class FederationSenderHandler: # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - hosts = { - row.entity - for row in rows - if not row.entity.startswith("@") and not row.is_signature - } - await self.federation_sender.send_device_messages(hosts, immediate=False) + if any(row.hosts_calculated for row in rows): + hosts = await self.store.get_destinations_for_device(token) + await self.federation_sender.send_device_messages( + hosts, immediate=False + ) elif stream_name == ToDeviceStream.NAME: # The to_device stream includes stuff to be pushed to both local diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 661206c841..d021904de7 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -549,10 +549,14 @@ class DeviceListsStream(_StreamFromIdGen): @attr.s(slots=True, frozen=True, auto_attribs=True) class DeviceListsStreamRow: - entity: str + user_id: str # Indicates that a user has signed their own device with their user-signing key is_signature: bool + # Indicates if this is a notification that we've calculated the hosts we + # need to send the update to. + hosts_calculated: bool + NAME = "device_lists" ROW_TYPE = DeviceListsStreamRow @@ -594,13 +598,13 @@ class DeviceListsStream(_StreamFromIdGen): upper_limit_token = min(upper_limit_token, signatures_to_token) device_updates = [ - (stream_id, (entity, False)) - for stream_id, (entity,) in device_updates + (stream_id, (entity, False, hosts)) + for stream_id, (entity, hosts) in device_updates if stream_id <= upper_limit_token ] signatures_updates = [ - (stream_id, (entity, True)) + (stream_id, (entity, True, False)) for stream_id, (entity,) in signatures_updates if stream_id <= upper_limit_token ] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 40187496e2..5eeca6165d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -164,22 +164,24 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): prefilled_cache=user_signature_stream_prefill, ) - ( - device_list_federation_prefill, - device_list_federation_list_id, - ) = self.db_pool.get_cache_dict( - db_conn, - "device_lists_outbound_pokes", - entity_column="destination", - stream_column="stream_id", - max_value=device_list_max, - limit=10000, - ) - self._device_list_federation_stream_cache = StreamChangeCache( - "DeviceListFederationStreamChangeCache", - device_list_federation_list_id, - prefilled_cache=device_list_federation_prefill, - ) + self._device_list_federation_stream_cache = None + if hs.should_send_federation(): + ( + device_list_federation_prefill, + device_list_federation_list_id, + ) = self.db_pool.get_cache_dict( + db_conn, + "device_lists_outbound_pokes", + entity_column="destination", + stream_column="stream_id", + max_value=device_list_max, + limit=10000, + ) + self._device_list_federation_stream_cache = StreamChangeCache( + "DeviceListFederationStreamChangeCache", + device_list_federation_list_id, + prefilled_cache=device_list_federation_prefill, + ) if hs.config.worker.run_background_tasks: self._clock.looping_call( @@ -207,23 +209,30 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) -> None: for row in rows: if row.is_signature: - self._user_signature_stream_cache.entity_has_changed(row.entity, token) + self._user_signature_stream_cache.entity_has_changed(row.user_id, token) continue # The entities are either user IDs (starting with '@') whose devices # have changed, or remote servers that we need to tell about # changes. - if row.entity.startswith("@"): - self._device_list_stream_cache.entity_has_changed(row.entity, token) - self.get_cached_devices_for_user.invalidate((row.entity,)) - self._get_cached_user_device.invalidate((row.entity,)) - self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,)) - - else: - self._device_list_federation_stream_cache.entity_has_changed( - row.entity, token + if not row.hosts_calculated: + self._device_list_stream_cache.entity_has_changed(row.user_id, token) + self.get_cached_devices_for_user.invalidate((row.user_id,)) + self._get_cached_user_device.invalidate((row.user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate( + (row.user_id,) ) + def device_lists_outbound_pokes_have_changed( + self, destinations: StrCollection, token: int + ) -> None: + assert self._device_list_federation_stream_cache is not None + + for destination in destinations: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + def device_lists_in_rooms_have_changed( self, room_ids: StrCollection, token: int ) -> None: @@ -363,6 +372,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): EDU contents. """ now_stream_id = self.get_device_stream_token() + if from_stream_id == now_stream_id: + return now_stream_id, [] + + if self._device_list_federation_stream_cache is None: + raise Exception("Func can only be used on federation senders") has_changed = self._device_list_federation_stream_cache.has_entity_changed( destination, int(from_stream_id) @@ -1018,10 +1032,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): # This query Does The Right Thing where it'll correctly apply the # bounds to the inner queries. sql = """ - SELECT stream_id, entity FROM ( - SELECT stream_id, user_id AS entity FROM device_lists_stream + SELECT stream_id, user_id, hosts FROM ( + SELECT stream_id, user_id, false AS hosts FROM device_lists_stream UNION ALL - SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes + SELECT DISTINCT stream_id, user_id, true AS hosts FROM device_lists_outbound_pokes ) AS e WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -1577,6 +1591,14 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): get_device_list_changes_in_room_txn, ) + async def get_destinations_for_device(self, stream_id: int) -> StrCollection: + return await self.db_pool.simple_select_onecol( + table="device_lists_outbound_pokes", + keyvalues={"stream_id": stream_id}, + retcol="destination", + desc="get_destinations_for_device", + ) + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -2112,12 +2134,13 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): stream_ids: List[int], context: Optional[Dict[str, str]], ) -> None: - for host in hosts: - txn.call_after( - self._device_list_federation_stream_cache.entity_has_changed, - host, - stream_ids[-1], - ) + if self._device_list_federation_stream_cache: + for host in hosts: + txn.call_after( + self._device_list_federation_stream_cache.entity_has_changed, + host, + stream_ids[-1], + ) now = self._clock.time_msec() stream_id_iterator = iter(stream_ids) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 38d8785faa..9e6c9561ae 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -123,9 +123,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker if stream_name == DeviceListsStream.NAME: for row in rows: assert isinstance(row, DeviceListsStream.DeviceListsStreamRow) - if row.entity.startswith("@"): + if not row.hosts_calculated: self._get_e2e_device_keys_for_federation_query_inner.invalidate( - (row.entity,) + (row.user_id,) ) super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index 7f975d04ff..ba01b038ab 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -36,6 +36,14 @@ class DeviceStoreTestCase(HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main + def default_config(self) -> JsonDict: + config = super().default_config() + + # We 'enable' federation otherwise `get_device_updates_by_remote` will + # throw an exception. + config["federation_sender_instances"] = ["master"] + return config + def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None: """Add a device list change for the given device to `device_lists_outbound_pokes` table. From c89fea3fd1f47b43c4d500dd7d024b2f9b24d2ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Jun 2024 11:17:39 +0100 Subject: [PATCH 6/6] Limit amount of replication we send (#17358) Fixes up #17333, where we failed to actually send less data (the `DISTINCT` didn't work due to `stream_id` being different). We fix this by making it so that every device list outbound poke for a given user ID has the same stream ID. We can't change the query to only return e.g. max stream ID as the receivers look up the destinations to send to by doing `SELECT WHERE stream_id = ?` --- changelog.d/17358.misc | 1 + synapse/storage/databases/main/devices.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 changelog.d/17358.misc diff --git a/changelog.d/17358.misc b/changelog.d/17358.misc new file mode 100644 index 0000000000..d3ef0b3777 --- /dev/null +++ b/changelog.d/17358.misc @@ -0,0 +1 @@ +Handle device lists notifications for large accounts more efficiently in worker mode. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5eeca6165d..59a035dd62 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -2131,7 +2131,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): user_id: str, device_id: str, hosts: Collection[str], - stream_ids: List[int], + stream_id: int, context: Optional[Dict[str, str]], ) -> None: if self._device_list_federation_stream_cache: @@ -2139,11 +2139,10 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): txn.call_after( self._device_list_federation_stream_cache.entity_has_changed, host, - stream_ids[-1], + stream_id, ) now = self._clock.time_msec() - stream_id_iterator = iter(stream_ids) encoded_context = json_encoder.encode(context) mark_sent = not self.hs.is_mine_id(user_id) @@ -2152,7 +2151,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ( destination, self._instance_name, - next(stream_id_iterator), + stream_id, user_id, device_id, mark_sent, @@ -2337,22 +2336,22 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): return def add_device_list_outbound_pokes_txn( - txn: LoggingTransaction, stream_ids: List[int] + txn: LoggingTransaction, stream_id: int ) -> None: self._add_device_outbound_poke_to_stream_txn( txn, user_id=user_id, device_id=device_id, hosts=hosts, - stream_ids=stream_ids, + stream_id=stream_id, context=context, ) - async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids: + async with self._device_list_id_gen.get_next() as stream_id: return await self.db_pool.runInteraction( "add_device_list_outbound_pokes", add_device_list_outbound_pokes_txn, - stream_ids, + stream_id, ) async def add_remote_device_list_to_pending(