1
0

Compare commits

...

6 Commits

Author SHA1 Message Date
Mathieu Velten
cec52ea9b0 Add changelog 2023-06-14 00:50:53 +02:00
Mathieu Velten
0cb8502bbb Use parse_duration for newly introduced options 2023-06-13 23:55:15 +02:00
Shay
553f2f53e7 Replace EventContext fields prev_group and delta_ids with field state_group_deltas (#15233) 2023-06-13 13:22:06 -07:00
Mathieu Velten
59ec4a0dc1 Fix MSC3983 support: only one OTK per device was returned through federation (#15770) 2023-06-13 19:51:47 +02:00
Eric Eastwood
0757d59ec4 Avoid backfill when we already have messages to return (#15737)
We now only block the client to backfill when we see a large gap in the events (more than 2 events missing in a row according to `depth`), more than 3 single-event holes, or not enough messages to fill the response. Otherwise, we return the messages directly to the client and backfill in the background for eventual consistency sake. 

Fix https://github.com/matrix-org/synapse/issues/15696
2023-06-13 12:31:08 -05:00
Patrick Cloke
df945e0d7c Fix MSC3983 support: Use the unstable /keys/claim federation endpoint if multiple keys are requested (#15755) 2023-06-13 18:07:55 +02:00
16 changed files with 280 additions and 107 deletions

1
changelog.d/15233.misc Normal file
View File

@@ -0,0 +1 @@
Replace `EventContext` fields `prev_group` and `delta_ids` with field `state_group_deltas`.

View File

@@ -0,0 +1 @@
Improve `/messages` response time by avoiding backfill when we already have messages to return.

1
changelog.d/15755.misc Normal file
View File

@@ -0,0 +1 @@
Fix requesting multiple keys at once over federation, related to [MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983).

1
changelog.d/15770.bugfix Normal file
View File

@@ -0,0 +1 @@
Fix requesting multiple keys at once over federation, related to [MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983).

1
changelog.d/15778.misc Normal file
View File

@@ -0,0 +1 @@
Use parse_duration for federation client timeout and retry options.

View File

@@ -53,9 +53,15 @@ class FederationConfig(Config):
# Allow for the configuration of timeout, max request retries
# and min/max retry delays in the matrix federation client.
self.client_timeout = federation_config.get("client_timeout", 60)
self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60)
self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2)
self.client_timeout = Config.parse_duration(
federation_config.get("client_timeout", "60s")
)
self.max_long_retry_delay = Config.parse_duration(
federation_config.get("max_long_retry_delay", "60s")
)
self.max_short_retry_delay = Config.parse_duration(
federation_config.get("max_short_retry_delay", "2s")
)
self.max_long_retries = federation_config.get("max_long_retries", 10)
self.max_short_retries = federation_config.get("max_short_retries", 3)

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Optional, Tuple
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
import attr
from immutabledict import immutabledict
@@ -107,33 +107,32 @@ class EventContext(UnpersistedEventContextBase):
state_delta_due_to_event: If `state_group` and `state_group_before_event` are not None
then this is the delta of the state between the two groups.
prev_group: If it is known, ``state_group``'s prev_group. Note that this being
None does not necessarily mean that ``state_group`` does not have
a prev_group!
state_group_deltas: If not empty, this is a dict collecting a mapping of the state
difference between state groups.
If the event is a state event, this is normally the same as
``state_group_before_event``.
The keys are a tuple of two integers: the initial group and final state group.
The corresponding value is a state map representing the state delta between
these state groups.
If ``state_group`` is None (ie, the event is an outlier), ``prev_group``
will always also be ``None``.
The dictionary is expected to have at most two entries with state groups of:
Note that this *not* (necessarily) the state group associated with
``_prev_state_ids``.
1. The state group before the event and after the event.
2. The state group preceding the state group before the event and the
state group before the event.
delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group``
and ``state_group``.
This information is collected and stored as part of an optimization for persisting
events.
partial_state: if True, we may be storing this event with a temporary,
incomplete state.
"""
_storage: "StorageControllers"
state_group_deltas: Dict[Tuple[int, int], StateMap[str]]
rejected: Optional[str] = None
_state_group: Optional[int] = None
state_group_before_event: Optional[int] = None
_state_delta_due_to_event: Optional[StateMap[str]] = None
prev_group: Optional[int] = None
delta_ids: Optional[StateMap[str]] = None
app_service: Optional[ApplicationService] = None
partial_state: bool = False
@@ -145,16 +144,14 @@ class EventContext(UnpersistedEventContextBase):
state_group_before_event: Optional[int],
state_delta_due_to_event: Optional[StateMap[str]],
partial_state: bool,
prev_group: Optional[int] = None,
delta_ids: Optional[StateMap[str]] = None,
state_group_deltas: Dict[Tuple[int, int], StateMap[str]],
) -> "EventContext":
return EventContext(
storage=storage,
state_group=state_group,
state_group_before_event=state_group_before_event,
state_delta_due_to_event=state_delta_due_to_event,
prev_group=prev_group,
delta_ids=delta_ids,
state_group_deltas=state_group_deltas,
partial_state=partial_state,
)
@@ -163,7 +160,7 @@ class EventContext(UnpersistedEventContextBase):
storage: "StorageControllers",
) -> "EventContext":
"""Return an EventContext instance suitable for persisting an outlier event"""
return EventContext(storage=storage)
return EventContext(storage=storage, state_group_deltas={})
async def persist(self, event: EventBase) -> "EventContext":
return self
@@ -183,13 +180,15 @@ class EventContext(UnpersistedEventContextBase):
"state_group": self._state_group,
"state_group_before_event": self.state_group_before_event,
"rejected": self.rejected,
"prev_group": self.prev_group,
"state_group_deltas": _encode_state_group_delta(self.state_group_deltas),
"state_delta_due_to_event": _encode_state_dict(
self._state_delta_due_to_event
),
"delta_ids": _encode_state_dict(self.delta_ids),
"app_service_id": self.app_service.id if self.app_service else None,
"partial_state": self.partial_state,
# add dummy delta_ids and prev_group for backwards compatibility
"delta_ids": None,
"prev_group": None,
}
@staticmethod
@@ -204,17 +203,24 @@ class EventContext(UnpersistedEventContextBase):
Returns:
The event context.
"""
# workaround for backwards/forwards compatibility: if the input doesn't have a value
# for "state_group_deltas" just assign an empty dict
state_group_deltas = input.get("state_group_deltas", None)
if state_group_deltas:
state_group_deltas = _decode_state_group_delta(state_group_deltas)
else:
state_group_deltas = {}
context = EventContext(
# We use the state_group and prev_state_id stuff to pull the
# current_state_ids out of the DB and construct prev_state_ids.
storage=storage,
state_group=input["state_group"],
state_group_before_event=input["state_group_before_event"],
prev_group=input["prev_group"],
state_group_deltas=state_group_deltas,
state_delta_due_to_event=_decode_state_dict(
input["state_delta_due_to_event"]
),
delta_ids=_decode_state_dict(input["delta_ids"]),
rejected=input["rejected"],
partial_state=input.get("partial_state", False),
)
@@ -349,7 +355,7 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
_storage: "StorageControllers"
state_group_before_event: Optional[int]
state_group_after_event: Optional[int]
state_delta_due_to_event: Optional[dict]
state_delta_due_to_event: Optional[StateMap[str]]
prev_group_for_state_group_before_event: Optional[int]
delta_ids_to_state_group_before_event: Optional[StateMap[str]]
partial_state: bool
@@ -380,26 +386,16 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
events_and_persisted_context = []
for event, unpersisted_context in amended_events_and_context:
if event.is_state():
context = EventContext(
storage=unpersisted_context._storage,
state_group=unpersisted_context.state_group_after_event,
state_group_before_event=unpersisted_context.state_group_before_event,
state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
partial_state=unpersisted_context.partial_state,
prev_group=unpersisted_context.state_group_before_event,
delta_ids=unpersisted_context.state_delta_due_to_event,
)
else:
context = EventContext(
storage=unpersisted_context._storage,
state_group=unpersisted_context.state_group_after_event,
state_group_before_event=unpersisted_context.state_group_before_event,
state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
partial_state=unpersisted_context.partial_state,
prev_group=unpersisted_context.prev_group_for_state_group_before_event,
delta_ids=unpersisted_context.delta_ids_to_state_group_before_event,
)
state_group_deltas = unpersisted_context._build_state_group_deltas()
context = EventContext(
storage=unpersisted_context._storage,
state_group=unpersisted_context.state_group_after_event,
state_group_before_event=unpersisted_context.state_group_before_event,
state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
partial_state=unpersisted_context.partial_state,
state_group_deltas=state_group_deltas,
)
events_and_persisted_context.append((event, context))
return events_and_persisted_context
@@ -452,11 +448,11 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
# if the event isn't a state event the state group doesn't change
if not self.state_delta_due_to_event:
state_group_after_event = self.state_group_before_event
self.state_group_after_event = self.state_group_before_event
# otherwise if it is a state event we need to get a state group for it
else:
state_group_after_event = await self._storage.state.store_state_group(
self.state_group_after_event = await self._storage.state.store_state_group(
event.event_id,
event.room_id,
prev_group=self.state_group_before_event,
@@ -464,16 +460,81 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
current_state_ids=None,
)
state_group_deltas = self._build_state_group_deltas()
return EventContext.with_state(
storage=self._storage,
state_group=state_group_after_event,
state_group=self.state_group_after_event,
state_group_before_event=self.state_group_before_event,
state_delta_due_to_event=self.state_delta_due_to_event,
state_group_deltas=state_group_deltas,
partial_state=self.partial_state,
prev_group=self.state_group_before_event,
delta_ids=self.state_delta_due_to_event,
)
def _build_state_group_deltas(self) -> Dict[Tuple[int, int], StateMap]:
"""
Collect deltas between the state groups associated with this context
"""
state_group_deltas = {}
# if we know the state group before the event and after the event, add them and the
# state delta between them to state_group_deltas
if self.state_group_before_event and self.state_group_after_event:
# if we have the state groups we should have the delta
assert self.state_delta_due_to_event is not None
state_group_deltas[
(
self.state_group_before_event,
self.state_group_after_event,
)
] = self.state_delta_due_to_event
# the state group before the event may also have a state group which precedes it, if
# we have that and the state group before the event, add them and the state
# delta between them to state_group_deltas
if (
self.prev_group_for_state_group_before_event
and self.state_group_before_event
):
# if we have both state groups we should have the delta between them
assert self.delta_ids_to_state_group_before_event is not None
state_group_deltas[
(
self.prev_group_for_state_group_before_event,
self.state_group_before_event,
)
] = self.delta_ids_to_state_group_before_event
return state_group_deltas
def _encode_state_group_delta(
state_group_delta: Dict[Tuple[int, int], StateMap[str]]
) -> List[Tuple[int, int, Optional[List[Tuple[str, str, str]]]]]:
if not state_group_delta:
return []
state_group_delta_encoded = []
for key, value in state_group_delta.items():
state_group_delta_encoded.append((key[0], key[1], _encode_state_dict(value)))
return state_group_delta_encoded
def _decode_state_group_delta(
input: List[Tuple[int, int, List[Tuple[str, str, str]]]]
) -> Dict[Tuple[int, int], StateMap[str]]:
if not input:
return {}
state_group_deltas = {}
for state_group_1, state_group_2, state_dict in input:
state_map = _decode_state_dict(state_dict)
assert state_map is not None
state_group_deltas[(state_group_1, state_group_2)] = state_map
return state_group_deltas
def _encode_state_dict(
state_dict: Optional[StateMap[str]],

View File

@@ -260,7 +260,9 @@ class FederationClient(FederationBase):
use_unstable = False
for user_id, one_time_keys in query.items():
for device_id, algorithms in one_time_keys.items():
if any(count > 1 for count in algorithms.values()):
# If more than one algorithm is requested, attempt to use the unstable
# endpoint.
if sum(algorithms.values()) > 1:
use_unstable = True
if algorithms:
# For the stable query, choose only the first algorithm.
@@ -296,6 +298,7 @@ class FederationClient(FederationBase):
else:
logger.debug("Skipping unstable claim client keys API")
# TODO Potentially attempt multiple queries and combine the results?
return await self.transport_layer.claim_client_keys(
user, destination, content, timeout
)

View File

@@ -1016,7 +1016,9 @@ class FederationServer(FederationBase):
for user_id, device_keys in result.items():
for device_id, keys in device_keys.items():
for key_id, key in keys.items():
json_result.setdefault(user_id, {})[device_id] = {key_id: key}
json_result.setdefault(user_id, {}).setdefault(device_id, {})[
key_id
] = key
logger.info(
"Claimed one-time-keys: %s",

View File

@@ -40,6 +40,11 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
# How many single event gaps we tolerate returning in a `/messages` response before we
# backfill and try to fill in the history. This is an arbitrarily picked number so feel
# free to tune it in the future.
BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
@attr.s(slots=True, auto_attribs=True)
class PurgeStatus:
@@ -486,35 +491,35 @@ class PaginationHandler:
room_id, room_token.stream
)
if not use_admin_priviledge and membership == Membership.LEAVE:
# If they have left the room then clamp the token to be before
# they left the room, to save the effort of loading from the
# database.
# If they have left the room then clamp the token to be before
# they left the room, to save the effort of loading from the
# database.
if (
pagin_config.direction == Direction.BACKWARDS
and not use_admin_priviledge
and membership == Membership.LEAVE
):
# This is only None if the room is world_readable, in which case
# "Membership.JOIN" would have been returned and we should never hit
# this branch.
assert member_event_id
# This is only None if the room is world_readable, in which
# case "JOIN" would have been returned.
assert member_event_id
leave_token = await self.store.get_topological_token_for_event(
member_event_id
)
assert leave_token.topological is not None
if leave_token.topological < curr_topo:
from_token = from_token.copy_and_replace(
StreamKeyType.ROOM, leave_token
)
await self.hs.get_federation_handler().maybe_backfill(
room_id,
curr_topo,
limit=pagin_config.limit,
leave_token = await self.store.get_topological_token_for_event(
member_event_id
)
assert leave_token.topological is not None
if leave_token.topological < curr_topo:
from_token = from_token.copy_and_replace(
StreamKeyType.ROOM, leave_token
)
to_room_key = None
if pagin_config.to_token:
to_room_key = pagin_config.to_token.room_key
# Initially fetch the events from the database. With any luck, we can return
# these without blocking on backfill (handled below).
events, next_key = await self.store.paginate_room_events(
room_id=room_id,
from_key=from_token.room_key,
@@ -524,6 +529,94 @@ class PaginationHandler:
event_filter=event_filter,
)
if pagin_config.direction == Direction.BACKWARDS:
# We use a `Set` because there can be multiple events at a given depth
# and we only care about looking at the unique continum of depths to
# find gaps.
event_depths: Set[int] = {event.depth for event in events}
sorted_event_depths = sorted(event_depths)
# Inspect the depths of the returned events to see if there are any gaps
found_big_gap = False
number_of_gaps = 0
previous_event_depth = (
sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0
)
for event_depth in sorted_event_depths:
# We don't expect a negative depth but we'll just deal with it in
# any case by taking the absolute value to get the true gap between
# any two integers.
depth_gap = abs(event_depth - previous_event_depth)
# A `depth_gap` of 1 is a normal continuous chain to the next event
# (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's
# also possible there is no event at a given depth but we can't ever
# know that for sure)
if depth_gap > 1:
number_of_gaps += 1
# We only tolerate a small number single-event long gaps in the
# returned events because those are most likely just events we've
# failed to pull in the past. Anything longer than that is probably
# a sign that we're missing a decent chunk of history and we should
# try to backfill it.
#
# XXX: It's possible we could tolerate longer gaps if we checked
# that a given events `prev_events` is one that has failed pull
# attempts and we could just treat it like a dead branch of history
# for now or at least something that we don't need the block the
# client on to try pulling.
#
# XXX: If we had something like MSC3871 to indicate gaps in the
# timeline to the client, we could also get away with any sized gap
# and just have the client refetch the holes as they see fit.
if depth_gap > 2:
found_big_gap = True
break
previous_event_depth = event_depth
# Backfill in the foreground if we found a big gap, have too many holes,
# or we don't have enough events to fill the limit that the client asked
# for.
missing_too_many_events = (
number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD
)
not_enough_events_to_fill_response = len(events) < pagin_config.limit
if (
found_big_gap
or missing_too_many_events
or not_enough_events_to_fill_response
):
did_backfill = (
await self.hs.get_federation_handler().maybe_backfill(
room_id,
curr_topo,
limit=pagin_config.limit,
)
)
# If we did backfill something, refetch the events from the database to
# catch anything new that might have been added since we last fetched.
if did_backfill:
events, next_key = await self.store.paginate_room_events(
room_id=room_id,
from_key=from_token.room_key,
to_key=to_room_key,
direction=pagin_config.direction,
limit=pagin_config.limit,
event_filter=event_filter,
)
else:
# Otherwise, we can backfill in the background for eventual
# consistency's sake but we don't need to block the client waiting
# for a costly federation call and processing.
run_as_background_process(
"maybe_backfill_in_the_background",
self.hs.get_federation_handler().maybe_backfill,
room_id,
curr_topo,
limit=pagin_config.limit,
)
next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key)
# if no events are returned from pagination, that implies

View File

@@ -404,10 +404,10 @@ class MatrixFederationHttpClient:
self.clock = hs.get_clock()
self._store = hs.get_datastores().main
self.version_string_bytes = hs.version_string.encode("ascii")
self.default_timeout = hs.config.federation.client_timeout
self.default_timeout = hs.config.federation.client_timeout / 1000
self.max_long_retry_delay = hs.config.federation.max_long_retry_delay
self.max_short_retry_delay = hs.config.federation.max_short_retry_delay
self.max_long_retry_delay = hs.config.federation.max_long_retry_delay / 1000
self.max_short_retry_delay = hs.config.federation.max_short_retry_delay / 1000
self.max_long_retries = hs.config.federation.max_long_retries
self.max_short_retries = hs.config.federation.max_short_retries
@@ -538,10 +538,10 @@ class MatrixFederationHttpClient:
logger.exception(f"Invalid destination: {request.destination}.")
raise FederationDeniedError(request.destination)
if timeout:
_sec_timeout = timeout / 1000
else:
_sec_timeout = self.default_timeout
if timeout is None:
timeout = int(self.default_timeout)
_sec_timeout = timeout / 1000
if (
self.hs.config.federation.federation_domain_whitelist is not None
@@ -946,10 +946,9 @@ class MatrixFederationHttpClient:
timeout=timeout,
)
if timeout is not None:
_sec_timeout = timeout / 1000
else:
_sec_timeout = self.default_timeout
if timeout is None:
timeout = int(self.default_timeout)
_sec_timeout = timeout / 1000
if parser is None:
parser = cast(ByteParser[T], JsonParser())
@@ -1135,10 +1134,9 @@ class MatrixFederationHttpClient:
timeout=timeout,
)
if timeout is not None:
_sec_timeout = timeout / 1000
else:
_sec_timeout = self.default_timeout
if timeout is None:
timeout = int(self.default_timeout)
_sec_timeout = timeout / 1000
if parser is None:
parser = cast(ByteParser[T], JsonParser())
@@ -1211,10 +1209,9 @@ class MatrixFederationHttpClient:
ignore_backoff=ignore_backoff,
)
if timeout is not None:
_sec_timeout = timeout / 1000
else:
_sec_timeout = self.default_timeout
if timeout is None:
timeout = int(self.default_timeout)
_sec_timeout = timeout / 1000
body = await _handle_response(
self.reactor, _sec_timeout, request, response, start_ms, parser=JsonParser()

View File

@@ -839,9 +839,8 @@ class EventsPersistenceStorageController:
"group" % (ev.event_id,)
)
continue
if ctx.prev_group:
state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids
if ctx.state_group_deltas:
state_group_deltas.update(ctx.state_group_deltas)
# We need to map the event_ids to their state groups. First, let's
# check if the event is one we're persisting, in which case we can

View File

@@ -101,8 +101,7 @@ class TestEventContext(unittest.HomeserverTestCase):
self.assertEqual(
context.state_group_before_event, d_context.state_group_before_event
)
self.assertEqual(context.prev_group, d_context.prev_group)
self.assertEqual(context.delta_ids, d_context.delta_ids)
self.assertEqual(context.state_group_deltas, d_context.state_group_deltas)
self.assertEqual(context.app_service, d_context.app_service)
self.assertEqual(

View File

@@ -644,9 +644,9 @@ class FederationClientTests(HomeserverTestCase):
@override_config(
{
"federation": {
"client_timeout": 180,
"max_long_retry_delay": 100,
"max_short_retry_delay": 7,
"client_timeout": "180s",
"max_long_retry_delay": "100s",
"max_short_retry_delay": "7s",
"max_long_retries": 20,
"max_short_retries": 5,
}

View File

@@ -401,7 +401,10 @@ class EventChainStoreTestCase(HomeserverTestCase):
assert persist_events_store is not None
persist_events_store._store_event_txn(
txn,
[(e, EventContext(self.hs.get_storage_controllers())) for e in events],
[
(e, EventContext(self.hs.get_storage_controllers(), {}))
for e in events
],
)
# Actually call the function that calculates the auth chain stuff.

View File

@@ -555,10 +555,15 @@ class StateTestCase(unittest.TestCase):
(e.event_id for e in old_state + [event]), current_state_ids.values()
)
self.assertIsNotNone(context.state_group_before_event)
assert context.state_group_before_event is not None
assert context.state_group is not None
self.assertEqual(
context.state_group_deltas.get(
(context.state_group_before_event, context.state_group)
),
{(event.type, event.state_key): event.event_id},
)
self.assertNotEqual(context.state_group_before_event, context.state_group)
self.assertEqual(context.state_group_before_event, context.prev_group)
self.assertEqual({("state", ""): event.event_id}, context.delta_ids)
@defer.inlineCallbacks
def test_trivial_annotate_message(