Compare commits

...

8 Commits

Author SHA1 Message Date
Sean Quah
d40ccbe86f fixup: add comment 2022-09-13 16:18:43 +01:00
Sean Quah
b234f4d9f8 Merge branch 'develop' into squah/fix_remote_user_leave_device_list_tracking 2022-09-09 17:08:29 +01:00
Sean Quah
d9287047dc Update synapse/handlers/e2e_keys.py 2022-09-09 14:26:33 +01:00
Sean Quah
5a12cb7346 Add newsfile 2022-09-08 15:53:50 +01:00
Sean Quah
10595c87a3 Fix insufficient mocking in test 2022-09-08 15:53:50 +01:00
Sean Quah
17dd4419a7 Check whether we still share a room when using cached device lists
Signed-off-by: Sean Quah <seanq@matrix.org>
2022-09-08 14:01:58 +01:00
Sean Quah
766b136db3 Remove redundant mark_remote_user_device_list_as_unsubscribed call
Now that we are handling device list unsubscriptions in the event
persistence code, it's no longer necessary to mark device lists as
unsubscribed elsewhere.
2022-09-08 14:01:46 +01:00
Sean Quah
18e5f60756 Fix bug in device list caching when remote users leave rooms
When a remote user leaves the last room shared with the homeserver, we
have to mark their device list as unsubscribed, otherwise we will hold
on to a stale device list in our cache. Crucially, the device list will
remain cached even after the remote user rejoins the room, which can
lead to E2EE failures until the remote user's device list next changes.

Signed-off-by: Sean Quah <seanq@matrix.org>
2022-09-08 14:01:39 +01:00
5 changed files with 51 additions and 15 deletions

1
changelog.d/13749.bugfix Normal file
View File

@@ -0,0 +1 @@
Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.

View File

@@ -45,7 +45,6 @@ from synapse.types import (
JsonDict,
StreamKeyType,
StreamToken,
UserID,
get_domain_from_id,
get_verify_key_from_cross_signing_key,
)
@@ -324,8 +323,6 @@ class DeviceHandler(DeviceWorkerHandler):
self.device_list_updater.incoming_device_list_update,
)
hs.get_distributor().observe("user_left_room", self.user_left_room)
# Whether `_handle_new_device_update_async` is currently processing.
self._handle_new_device_update_is_processing = False
@@ -569,14 +566,6 @@ class DeviceHandler(DeviceWorkerHandler):
StreamKeyType.DEVICE_LIST, position, users=[from_user_id]
)
async def user_left_room(self, user: UserID, room_id: str) -> None:
user_id = user.to_string()
room_ids = await self.store.get_rooms_for_user(user_id)
if not room_ids:
# We no longer share rooms with this user, so we'll no longer
# receive device updates. Mark this in DB.
await self.store.mark_remote_user_device_list_as_unsubscribed(user_id)
async def store_dehydrated_device(
self,
user_id: str,

View File

@@ -175,6 +175,32 @@ class E2eKeysHandler:
user_ids_not_in_cache,
remote_results,
) = await self.store.get_user_devices_from_cache(query_list)
# Check that the homeserver still shares a room with all cached users.
# Note that this check may be slightly racy when a remote user leaves a
# room after we have fetched their cached device list. In the worst case
# we will do extra federation queries for devices that we had cached.
cached_users = set(remote_results.keys())
valid_cached_users = (
await self.store.get_users_server_still_shares_room_with(
remote_results.keys()
)
)
invalid_cached_users = cached_users - valid_cached_users
if invalid_cached_users:
# Fix up results. If we get here, there is either a bug in device
# list tracking, or we hit the race mentioned above.
user_ids_not_in_cache.update(invalid_cached_users)
for invalid_user_id in invalid_cached_users:
remote_results.pop(invalid_user_id)
# This log message may be removed if it turns out it's almost
# entirely triggered by races.
logger.error(
"Devices for %s were cached, but the server no longer shares "
"any rooms with them. The cached device lists are stale.",
invalid_cached_users,
)
for user_id, devices in remote_results.items():
user_devices = results.setdefault(user_id, {})
for device_id, device in devices.items():

View File

@@ -598,9 +598,9 @@ class EventsPersistenceStorageController:
# room
state_delta_for_room: Dict[str, DeltaState] = {}
# Set of remote users which were in rooms the server has left. We
# should check if we still share any rooms and if not we mark their
# device lists as stale.
# Set of remote users which were in rooms the server has left or who may
# have left rooms the server is in. We should check if we still share any
# rooms and if not we mark their device lists as stale.
potentially_left_users: Set[str] = set()
if not backfilled:
@@ -725,6 +725,20 @@ class EventsPersistenceStorageController:
current_state = {}
delta.no_longer_in_room = True
# Add all remote users that might have left rooms.
potentially_left_users.update(
user_id
for event_type, user_id in delta.to_delete
if event_type == EventTypes.Member
and not self.is_mine_id(user_id)
)
potentially_left_users.update(
user_id
for event_type, user_id in delta.to_insert.keys()
if event_type == EventTypes.Member
and not self.is_mine_id(user_id)
)
state_delta_for_room[room_id] = delta
await self.persist_events_store._persist_events_and_state_updates(

View File

@@ -891,6 +891,12 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
new_callable=mock.MagicMock,
return_value=make_awaitable(["some_room_id"]),
)
mock_get_users = mock.patch.object(
self.store,
"get_users_server_still_shares_room_with",
new_callable=mock.MagicMock,
return_value=make_awaitable({remote_user_id}),
)
mock_request = mock.patch.object(
self.hs.get_federation_client(),
"query_user_devices",
@@ -898,7 +904,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
return_value=make_awaitable(response_body),
)
with mock_get_rooms, mock_request as mocked_federation_request:
with mock_get_rooms, mock_get_users, mock_request as mocked_federation_request:
# Make the first query and sanity check it succeeds.
response_1 = self.get_success(
e2e_handler.query_devices(