|
|
|
|
@@ -1,4 +1,4 @@
|
|
|
|
|
# Copyright 2014-2021 The Matrix.org Foundation C.I.C.
|
|
|
|
|
# Copyright 2014-2022 The Matrix.org Foundation C.I.C.
|
|
|
|
|
# Copyright 2020 Sorunome
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
@@ -15,9 +15,10 @@
|
|
|
|
|
|
|
|
|
|
"""Contains handlers for federation events."""
|
|
|
|
|
|
|
|
|
|
import itertools
|
|
|
|
|
import logging
|
|
|
|
|
from http import HTTPStatus
|
|
|
|
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
|
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
|
|
|
|
|
|
|
|
from signedjson.key import decode_verify_key_bytes
|
|
|
|
|
from signedjson.sign import verify_signed_json
|
|
|
|
|
@@ -178,68 +179,24 @@ class FederationHandler:
|
|
|
|
|
logger.debug("Not backfilling as no extremeties found.")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# We only want to paginate if we can actually see the events we'll get,
|
|
|
|
|
# as otherwise we'll just spend a lot of resources to get redacted
|
|
|
|
|
# events.
|
|
|
|
|
#
|
|
|
|
|
# We do this by filtering all the backwards extremities and seeing if
|
|
|
|
|
# any remain. Given we don't have the extremity events themselves, we
|
|
|
|
|
# need to actually check the events that reference them.
|
|
|
|
|
#
|
|
|
|
|
# *Note*: the spec wants us to keep backfilling until we reach the start
|
|
|
|
|
# of the room in case we are allowed to see some of the history. However
|
|
|
|
|
# in practice that causes more issues than its worth, as a) its
|
|
|
|
|
# relatively rare for there to be any visible history and b) even when
|
|
|
|
|
# there is its often sufficiently long ago that clients would stop
|
|
|
|
|
# attempting to paginate before backfill reached the visible history.
|
|
|
|
|
#
|
|
|
|
|
# TODO: If we do do a backfill then we should filter the backwards
|
|
|
|
|
# extremities to only include those that point to visible portions of
|
|
|
|
|
# history.
|
|
|
|
|
#
|
|
|
|
|
# TODO: Correctly handle the case where we are allowed to see the
|
|
|
|
|
# forward event but not the backward extremity, e.g. in the case of
|
|
|
|
|
# initial join of the server where we are allowed to see the join
|
|
|
|
|
# event but not anything before it. This would require looking at the
|
|
|
|
|
# state *before* the event, ignoring the special casing certain event
|
|
|
|
|
# types have.
|
|
|
|
|
|
|
|
|
|
forward_event_ids = await self.store.get_successor_events(
|
|
|
|
|
list(oldest_events_with_depth)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
extremities_events = await self.store.get_events(
|
|
|
|
|
forward_event_ids,
|
|
|
|
|
redact_behaviour=EventRedactBehaviour.AS_IS,
|
|
|
|
|
get_prev_content=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# We set `check_history_visibility_only` as we might otherwise get false
|
|
|
|
|
# positives from users having been erased.
|
|
|
|
|
filtered_extremities = await filter_events_for_server(
|
|
|
|
|
self.storage,
|
|
|
|
|
self.server_name,
|
|
|
|
|
list(extremities_events.values()),
|
|
|
|
|
redact=False,
|
|
|
|
|
check_history_visibility_only=True,
|
|
|
|
|
# we now have a list of potential places to backpaginate from. We prefer to
|
|
|
|
|
# start with the most recent (ie, max depth), so let's sort the list.
|
|
|
|
|
sorted_extremeties_tuples: List[Tuple[str, int]] = sorted(
|
|
|
|
|
itertools.chain(
|
|
|
|
|
oldest_events_with_depth.items(),
|
|
|
|
|
insertion_events_to_be_backfilled.items(),
|
|
|
|
|
),
|
|
|
|
|
key=lambda e: -int(e[1]),
|
|
|
|
|
)
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: filtered_extremities %s", filtered_extremities
|
|
|
|
|
"_maybe_backfill_inner: room_id: %s: current_depth: %s, limit: %s, extrems (%d): %s",
|
|
|
|
|
room_id,
|
|
|
|
|
current_depth,
|
|
|
|
|
limit,
|
|
|
|
|
len(sorted_extremeties_tuples),
|
|
|
|
|
sorted_extremeties_tuples,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not filtered_extremities and not insertion_events_to_be_backfilled:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
extremities = {
|
|
|
|
|
**oldest_events_with_depth,
|
|
|
|
|
# TODO: insertion_events_to_be_backfilled is currently skipping the filtered_extremities checks
|
|
|
|
|
**insertion_events_to_be_backfilled,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Check if we reached a point where we should start backfilling.
|
|
|
|
|
sorted_extremeties_tuple = sorted(extremities.items(), key=lambda e: -int(e[1]))
|
|
|
|
|
max_depth = sorted_extremeties_tuple[0][1]
|
|
|
|
|
|
|
|
|
|
# If we're approaching an extremity we trigger a backfill, otherwise we
|
|
|
|
|
# no-op.
|
|
|
|
|
#
|
|
|
|
|
@@ -249,6 +206,11 @@ class FederationHandler:
|
|
|
|
|
# chose more than one times the limit in case of failure, but choosing a
|
|
|
|
|
# much larger factor will result in triggering a backfill request much
|
|
|
|
|
# earlier than necessary.
|
|
|
|
|
#
|
|
|
|
|
# XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
|
|
|
|
|
# care about events that have happened after our current position.
|
|
|
|
|
#
|
|
|
|
|
max_depth = sorted_extremeties_tuples[0][1]
|
|
|
|
|
if current_depth - 2 * limit > max_depth:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"Not backfilling as we don't need to. %d < %d - 2 * %d",
|
|
|
|
|
@@ -265,31 +227,98 @@ class FederationHandler:
|
|
|
|
|
# 2. we have likely previously tried and failed to backfill from that
|
|
|
|
|
# extremity, so to avoid getting "stuck" requesting the same
|
|
|
|
|
# backfill repeatedly we drop those extremities.
|
|
|
|
|
filtered_sorted_extremeties_tuple = [
|
|
|
|
|
t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
"room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems (%d): %s filtered_sorted_extremeties_tuple: %s",
|
|
|
|
|
room_id,
|
|
|
|
|
current_depth,
|
|
|
|
|
limit,
|
|
|
|
|
max_depth,
|
|
|
|
|
len(sorted_extremeties_tuple),
|
|
|
|
|
sorted_extremeties_tuple,
|
|
|
|
|
filtered_sorted_extremeties_tuple,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# However, we need to check that the filtered extremities are non-empty.
|
|
|
|
|
# If they are empty then either we can a) bail or b) still attempt to
|
|
|
|
|
# backfill. We opt to try backfilling anyway just in case we do get
|
|
|
|
|
# relevant events.
|
|
|
|
|
if filtered_sorted_extremeties_tuple:
|
|
|
|
|
sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
|
|
|
|
|
#
|
|
|
|
|
filtered_sorted_extremeties_tuples = [
|
|
|
|
|
t for t in sorted_extremeties_tuples if int(t[1]) <= current_depth
|
|
|
|
|
]
|
|
|
|
|
if filtered_sorted_extremeties_tuples:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: extrems before current depth: %s",
|
|
|
|
|
filtered_sorted_extremeties_tuples,
|
|
|
|
|
)
|
|
|
|
|
sorted_extremeties_tuples = filtered_sorted_extremeties_tuples
|
|
|
|
|
else:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: all extrems are *after* current depth. Backfilling anyway."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# We don't want to specify too many extremities as it causes the backfill
|
|
|
|
|
# request URI to be too long.
|
|
|
|
|
extremities = dict(sorted_extremeties_tuple[:5])
|
|
|
|
|
# We still need to narrow down the list of extremities we pass to the remote
|
|
|
|
|
# server. We limit to 5 of them, to avoid the request URI becoming too long.
|
|
|
|
|
#
|
|
|
|
|
# However, we only want to paginate from a particular extremity if we can
|
|
|
|
|
# actually see the events we'll get, as otherwise we'll just spend a lot of
|
|
|
|
|
# resources to get redacted events.
|
|
|
|
|
#
|
|
|
|
|
# We do this by filtering all the backwards extremities and seeing if
|
|
|
|
|
# any remain. Given we don't have the extremity events themselves, we
|
|
|
|
|
# need to actually check the events that reference them - their "successor"
|
|
|
|
|
# events.
|
|
|
|
|
#
|
|
|
|
|
# *Note*: the spec wants us to keep backfilling until we reach the start
|
|
|
|
|
# of the room in case we are allowed to see some of the history. However
|
|
|
|
|
# in practice that causes more issues than its worth, as a) its
|
|
|
|
|
# relatively rare for there to be any visible history and b) even when
|
|
|
|
|
# there is its often sufficiently long ago that clients would stop
|
|
|
|
|
# attempting to paginate before backfill reached the visible history.
|
|
|
|
|
#
|
|
|
|
|
# Calculating the visibility of each extremity is quite expensive, and there
|
|
|
|
|
# can be thousands of them in a big gappy room, so we just check them one
|
|
|
|
|
# by one until we've checked them all, or we've got 5 of them.
|
|
|
|
|
#
|
|
|
|
|
# TODO: Correctly handle the case where we are allowed to see the
|
|
|
|
|
# successor event but not the backward extremity, e.g. in the case of
|
|
|
|
|
# initial join of the server where we are allowed to see the join
|
|
|
|
|
# event but not anything before it. This would require looking at the
|
|
|
|
|
# state *before* the event, ignoring the special casing certain event
|
|
|
|
|
# types have.
|
|
|
|
|
|
|
|
|
|
extremities_to_request: Set[str] = set()
|
|
|
|
|
for extremity_event_id, _ in sorted_extremeties_tuples:
|
|
|
|
|
if len(extremities_to_request) >= 5:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
successor_event_ids = await self.store.get_successor_events(
|
|
|
|
|
[extremity_event_id]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
successor_events = await self.store.get_events_as_list(
|
|
|
|
|
successor_event_ids,
|
|
|
|
|
redact_behaviour=EventRedactBehaviour.AS_IS,
|
|
|
|
|
get_prev_content=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# We set `check_history_visibility_only` as we might otherwise get false
|
|
|
|
|
# positives from users having been erased.
|
|
|
|
|
filtered_extremities = await filter_events_for_server(
|
|
|
|
|
self.storage,
|
|
|
|
|
self.server_name,
|
|
|
|
|
successor_events,
|
|
|
|
|
redact=False,
|
|
|
|
|
check_history_visibility_only=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if filtered_extremities:
|
|
|
|
|
extremities_to_request.add(extremity_event_id)
|
|
|
|
|
else:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: skipping extremity %s as it would not be visible.",
|
|
|
|
|
extremity_event_id,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not extremities_to_request:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: found no extremities which would be visible"
|
|
|
|
|
)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
"_maybe_backfill_inner: extremities_to_request %s", extremities_to_request
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Now we need to decide which hosts to hit first.
|
|
|
|
|
|
|
|
|
|
@@ -309,7 +338,7 @@ class FederationHandler:
|
|
|
|
|
for dom in domains:
|
|
|
|
|
try:
|
|
|
|
|
await self._federation_event_handler.backfill(
|
|
|
|
|
dom, room_id, limit=100, extremities=extremities
|
|
|
|
|
dom, room_id, limit=100, extremities=extremities_to_request
|
|
|
|
|
)
|
|
|
|
|
# If this succeeded then we probably already have the
|
|
|
|
|
# appropriate stuff.
|
|
|
|
|
|