Fix case where get_partial_current_state_deltas could return >100 rows (#18960)

This commit is contained in:
Andrew Morgan
2025-11-26 17:17:04 +00:00
committed by GitHub
parent c928347779
commit 703464c1f7
4 changed files with 378 additions and 32 deletions

View File

@@ -19,6 +19,7 @@
#
#
import json
import logging
from typing import cast
@@ -33,6 +34,7 @@ from synapse.server import HomeServer
from synapse.types import JsonDict, RoomID, StateMap, UserID
from synapse.types.state import StateFilter
from synapse.util.clock import Clock
from synapse.util.stringutils import random_string
from tests.unittest import HomeserverTestCase
@@ -643,3 +645,315 @@ class StateStoreTestCase(HomeserverTestCase):
),
)
self.assertEqual(context.state_group_before_event, groups[0][0])
class CurrentStateDeltaStreamTestCase(HomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
super().prepare(reactor, clock, hs)
self.store = hs.get_datastores().main
self.storage = hs.get_storage_controllers()
self.state_datastore = self.storage.state.stores.state
self.event_creation_handler = hs.get_event_creation_handler()
self.event_builder_factory = hs.get_event_builder_factory()
# Create a made-up room and a user.
self.alice_user_id = UserID.from_string("@alice:test")
self.room = RoomID.from_string("!abc1234:test")
self.get_success(
self.store.store_room(
self.room.to_string(),
room_creator_user_id="@creator:text",
is_public=True,
room_version=RoomVersions.V1,
)
)
def inject_state_event(
self, room: RoomID, sender: UserID, typ: str, state_key: str, content: JsonDict
) -> EventBase:
builder = self.event_builder_factory.for_room_version(
RoomVersions.V1,
{
"type": typ,
"sender": sender.to_string(),
"state_key": state_key,
"room_id": room.to_string(),
"content": content,
},
)
event, unpersisted_context = self.get_success(
self.event_creation_handler.create_new_client_event(builder)
)
context = self.get_success(unpersisted_context.persist(event))
assert self.storage.persistence is not None
self.get_success(self.storage.persistence.persist_event(event, context))
return event
def test_get_partial_current_state_deltas_limit(self) -> None:
"""
Tests that `get_partial_current_state_deltas` actually returns `limit` rows.
Regression test for https://github.com/element-hq/synapse/pull/18960.
"""
# Inject a create event which other events can auth with.
self.inject_state_event(
self.room, self.alice_user_id, EventTypes.Create, "", {}
)
limit = 2
# Make N*2 state changes in the room, resulting in 2N+1 total state
# events (including the create event) in the room.
for i in range(limit * 2):
self.inject_state_event(
self.room,
self.alice_user_id,
EventTypes.Name,
"",
{"name": f"rename #{i}"},
)
# Call the function under test. This must return <= `limit` rows.
max_stream_id = self.store.get_room_max_stream_ordering()
clipped_stream_id, deltas = self.get_success(
self.store.get_partial_current_state_deltas(
prev_stream_id=0,
max_stream_id=max_stream_id,
limit=limit,
)
)
self.assertLessEqual(
len(deltas), limit, f"Returned {len(deltas)} rows, expected at most {limit}"
)
# Advancing from the clipped point should eventually drain the remainder.
# Make sure we make progress and dont get stuck.
if deltas:
next_prev = clipped_stream_id
next_clipped, next_deltas = self.get_success(
self.store.get_partial_current_state_deltas(
prev_stream_id=next_prev, max_stream_id=max_stream_id, limit=limit
)
)
self.assertNotEqual(
next_clipped, clipped_stream_id, "Did not advance clipped_stream_id"
)
# Still should respect the limit.
self.assertLessEqual(len(next_deltas), limit)
def test_non_unique_stream_ids_in_current_state_delta_stream(self) -> None:
"""
Tests that `get_partial_current_state_deltas` always returns entire
groups of state deltas (grouped by `stream_id`), and never part of one.
We check by passing a `limit` that to the function that, if followed
blindly, would split a group of state deltas that share a `stream_id`.
The test passes if that group is not returned at all (because doing so
would overshoot the limit of returned state deltas).
Regression test for https://github.com/element-hq/synapse/pull/18960.
"""
# Inject a create event to start with.
self.inject_state_event(
self.room, self.alice_user_id, EventTypes.Create, "", {}
)
# Then inject one "real" m.room.name event. This will give us a stream_id that
# we can create some more (fake) events with.
self.inject_state_event(
self.room,
self.alice_user_id,
EventTypes.Name,
"",
{"name": "rename #1"},
)
# Get the stream_id of the last-inserted event.
max_stream_id = self.store.get_room_max_stream_ordering()
# Make 3 more state changes in the room, resulting in 5 total state
# events (including the create event, and the first name update) in
# the room.
#
# All of these state deltas have the same `stream_id` as the original name event.
# Do so by editing the table directly as that's the simplest way to have
# all share the same `stream_id`.
self.get_success(
self.store.db_pool.simple_insert_many(
"current_state_delta_stream",
keys=(
"stream_id",
"room_id",
"type",
"state_key",
"event_id",
"prev_event_id",
"instance_name",
),
values=[
(
max_stream_id,
self.room.to_string(),
EventTypes.Name,
"",
f"${random_string(5)}:test",
json.dumps({"name": f"rename #{i}"}),
"master",
)
for i in range(3)
],
desc="inject_room_name_state_events",
)
)
# Call the function under test with a limit of 4. Without the limit, we
# would return 5 state deltas:
#
# C N N N N
# 1 2 3 4 5
#
# C = m.room.create
# N = m.room.name
#
# With the limit, we should return only the create event, as returning 4
# state deltas would result in splitting a group:
#
# 2 3 3 3 3 - state IDs/groups
# C N N N N
# 1 2 3 4 X
clipped_stream_id, deltas = self.get_success(
self.store.get_partial_current_state_deltas(
prev_stream_id=0,
max_stream_id=max_stream_id,
limit=4,
)
)
# 2 is the stream ID of the m.room.create event.
self.assertEqual(clipped_stream_id, 2)
self.assertEqual(
len(deltas),
1,
f"Returned {len(deltas)} rows, expected only one (the create event): {deltas}",
)
# Advance once more with our limit of 4. We should now get all 4
# `m.room.name` state deltas as they can fit under the limit.
clipped_stream_id, next_deltas = self.get_success(
self.store.get_partial_current_state_deltas(
prev_stream_id=clipped_stream_id, max_stream_id=max_stream_id, limit=4
)
)
self.assertEqual(
clipped_stream_id, 3
) # The stream ID of the 4 m.room.name events.
self.assertEqual(
len(next_deltas),
4,
f"Returned {len(next_deltas)} rows, expected all 4 m.room.name events: {next_deltas}",
)
def test_get_partial_current_state_deltas_does_not_enter_infinite_loop(
self,
) -> None:
"""
Tests that `get_partial_current_state_deltas` does not repeatedly return
zero entries due to the passed `limit` parameter being less than the
size of the next group of state deltas from the given `prev_stream_id`.
"""
# Inject a create event to start with.
self.inject_state_event(
self.room, self.alice_user_id, EventTypes.Create, "", {}
)
# Then inject one "real" m.room.name event. This will give us a stream_id that
# we can create some more (fake) events with.
self.inject_state_event(
self.room,
self.alice_user_id,
EventTypes.Name,
"",
{"name": "rename #1"},
)
# Get the stream_id of the last-inserted event.
max_stream_id = self.store.get_room_max_stream_ordering()
# Make 3 more state changes in the room, resulting in 5 total state
# events (including the create event, and the first name update) in
# the room.
#
# All of these state deltas have the same `stream_id` as the original name event.
# Do so by editing the table directly as that's the simplest way to have
# all share the same `stream_id`.
self.get_success(
self.store.db_pool.simple_insert_many(
"current_state_delta_stream",
keys=(
"stream_id",
"room_id",
"type",
"state_key",
"event_id",
"prev_event_id",
"instance_name",
),
values=[
(
max_stream_id,
self.room.to_string(),
EventTypes.Name,
"",
f"${random_string(5)}:test",
json.dumps({"name": f"rename #{i}"}),
"master",
)
for i in range(3)
],
desc="inject_room_name_state_events",
)
)
# Call the function under test with a limit of 4. Without the limit, we would return
# 5 state deltas:
#
# C N N N N
# 1 2 3 4 5
#
# C = m.room.create
# N = m.room.name
#
# With the limit, we should return only the create event, as returning 4
# state deltas would result in splitting a group:
#
# 2 3 3 3 3 - state IDs/groups
# C N N N N
# 1 2 3 4 X
clipped_stream_id, deltas = self.get_success(
self.store.get_partial_current_state_deltas(
prev_stream_id=2, # Start after the create event (which has stream_id 2).
max_stream_id=max_stream_id,
limit=2, # Less than the size of the next group (which is 4).
)
)
self.assertEqual(
clipped_stream_id, 3
) # The stream ID of the 4 m.room.name events.
# We should get all 4 `m.room.name` state deltas, instead of 0, which
# would result in the caller entering an infinite loop.
self.assertEqual(
len(deltas),
4,
f"Returned {len(deltas)} rows, expected 4 even though it broke our limit: {deltas}",
)