From 8ee69f299cb3360de5c88f0c6b07525d35247fbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 1 Jul 2019 17:55:11 +0100 Subject: [PATCH 01/80] Add basic function to get all data for a user out of synapse --- synapse/handlers/admin.py | 247 ++++++++++++++++++++++++++++++++++ synapse/storage/roommember.py | 20 +++ tests/handlers/test_admin.py | 210 +++++++++++++++++++++++++++++ tests/unittest.py | 2 +- 4 files changed, 478 insertions(+), 1 deletion(-) create mode 100644 tests/handlers/test_admin.py diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 941ebfa107..e424fc46bd 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -14,9 +14,17 @@ # limitations under the License. import logging +import os +import tempfile + +from canonicaljson import json from twisted.internet import defer +from synapse.api.constants import Membership +from synapse.types import RoomStreamToken +from synapse.visibility import filter_events_for_client + from ._base import BaseHandler logger = logging.getLogger(__name__) @@ -89,3 +97,242 @@ class AdminHandler(BaseHandler): ret = yield self.store.search_users(term) defer.returnValue(ret) + + @defer.inlineCallbacks + def exfiltrate_user_data(self, user_id, writer): + """Write all data we have of the user to the specified directory. + + Args: + user_id (str) + writer (ExfiltrationWriter) + + Returns: + defer.Deferred + """ + # Get all rooms the user is in or has been in + rooms = yield self.store.get_rooms_for_user_where_membership_is( + user_id, + membership_list=( + Membership.JOIN, + Membership.LEAVE, + Membership.BAN, + Membership.INVITE, + ), + ) + + # We only try and fetch events for rooms the user has been in. If + # they've been e.g. invited to a room without joining then we handle + # those seperately. + rooms_user_has_been_in = yield self.store.get_rooms_user_has_been_in(user_id) + + for index, room in enumerate(rooms): + room_id = room.room_id + + logger.info( + "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms) + ) + + forgotten = yield self.store.did_forget(user_id, room_id) + if forgotten: + logger.info("[%s] User forgot room %d, ignoring", room_id) + continue + + if room_id not in rooms_user_has_been_in: + # If we haven't been in the rooms then the filtering code below + # won't return anything, so we need to handle these cases + # explicitly. + + if room.membership == Membership.INVITE: + event_id = room.event_id + invite = yield self.store.get_event(event_id, allow_none=True) + if invite: + invited_state = invite.unsigned["invite_room_state"] + writer.write_invite(room_id, invite, invited_state) + + continue + + # We only want to bother fetching events up to the last time they + # were joined. We estimate that point by looking at the + # stream_ordering of the last membership if it wasn't a join. + if room.membership == Membership.JOIN: + stream_ordering = yield self.store.get_room_max_stream_ordering() + else: + stream_ordering = room.stream_ordering + + from_key = str(RoomStreamToken(0, 0)) + to_key = str(RoomStreamToken(None, stream_ordering)) + + written_events = set() # Events that we've processed in this room + + # We need to track gaps in the events stream so that we can then + # write out the state at those events. We do this by keeping track + # of events whose prev events we haven't seen. + + # Map from event ID to prev events that haven't been processed, + # dict[str, set[str]]. + event_to_unseen_prevs = {} + + # The reverse mapping to above, i.e. map from unseen event to parent + # events. dict[str, set[str]] + unseen_event_to_parents = {} + + # We fetch events in the room the user could see by fetching *all* + # events that we have and then filtering, this isn't the most + # efficient method perhaps but it does guarentee we get everything. + while True: + events, _ = yield self.store.paginate_room_events( + room_id, from_key, to_key, limit=100, direction="f" + ) + if not events: + break + + from_key = events[-1].internal_metadata.after + + events = yield filter_events_for_client(self.store, user_id, events) + + writer.write_events(room_id, events) + + # Update the extremity tracking dicts + for event in events: + # Check if we have any prev events that haven't been + # processed yet, and add those to the appropriate dicts. + unseen_events = set(event.prev_event_ids()) - written_events + if unseen_events: + event_to_unseen_prevs[event.event_id] = unseen_events + for unseen in unseen_events: + unseen_event_to_parents.setdefault(unseen, set()).add( + event.event_id + ) + + # Now check if this event is an unseen prev event, if so + # then we remove this event from the appropriate dicts. + for event_id in unseen_event_to_parents.pop(event.event_id, []): + event_to_unseen_prevs.get(event_id, set()).discard( + event.event_id + ) + + written_events.add(event.event_id) + + logger.info( + "Written %d events in room %s", len(written_events), room_id + ) + + # Extremities are the events who have at least one unseen prev event. + extremities = ( + event_id + for event_id, unseen_prevs in event_to_unseen_prevs.items() + if unseen_prevs + ) + for event_id in extremities: + if not event_to_unseen_prevs[event_id]: + continue + state = yield self.store.get_state_for_event(event_id) + writer.write_state(room_id, event_id, state) + + defer.returnValue(writer.finished()) + + +class ExfiltrationWriter(object): + """Interfaced used to specify how to write exfilrated data. + """ + + def write_events(self, room_id, events): + """Write a batch of events for a room. + + Args: + room_id (str) + events (list[FrozenEvent]) + """ + pass + + def write_state(self, room_id, event_id, state): + """Write the state at the given event in the room. + + This only gets called for backward extremities rather than for each + event. + + Args: + room_id (str) + event_id (str) + state (list[FrozenEvent]) + """ + pass + + def write_invite(self, room_id, event, state): + """Write an invite for the room, with associated invite state. + + Args: + room_id (str) + invite (FrozenEvent) + state (list[dict]): A subset of the state at the invite, with a + subset of the event keys (type, state_key, content and sender) + """ + + def finished(self): + """Called when exfiltration is complete, and the return valus is passed + to the requester. + """ + pass + + +class FileExfiltrationWriter(ExfiltrationWriter): + """An ExfiltrationWriter that writes the users data to a directory. + + Returns the directory location on completion. + + Args: + user_id (str): The user whose data is being exfiltrated. + directory (str|None): The directory to write the data to, if None then + will write to a temporary directory. + """ + + def __init__(self, user_id, directory=None): + self.user_id = user_id + + if directory: + self.base_directory = directory + else: + self.base_directory = tempfile.mkdtemp( + prefix="synapse-exfiltrate__%s__" % (user_id,) + ) + + os.makedirs(self.base_directory, exist_ok=True) + if list(os.listdir(self.base_directory)): + raise Exception("Directory must be empty") + + def write_events(self, room_id, events): + room_directory = os.path.join(self.base_directory, "rooms", room_id) + os.makedirs(room_directory, exist_ok=True) + events_file = os.path.join(room_directory, "events") + + with open(events_file, "a") as f: + for event in events: + print(json.dumps(event.get_pdu_json()), file=f) + + def write_state(self, room_id, event_id, state): + room_directory = os.path.join(self.base_directory, "rooms", room_id) + state_directory = os.path.join(room_directory, "state") + os.makedirs(state_directory, exist_ok=True) + + event_file = os.path.join(state_directory, event_id) + + with open(event_file, "a") as f: + for event in state.values(): + print(json.dumps(event.get_pdu_json()), file=f) + + def write_invite(self, room_id, event, state): + self.write_events(room_id, [event]) + + # We write the invite state somewhere else as they aren't full events + # and are only a subset of the state at the event. + room_directory = os.path.join(self.base_directory, "rooms", room_id) + os.makedirs(room_directory, exist_ok=True) + + invite_state = os.path.join(room_directory, "invite_state") + + with open(invite_state, "a") as f: + for event in state.values(): + print(json.dumps(event), file=f) + + def finished(self): + return self.base_directory diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 8004aeb909..32cfd010a5 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -575,6 +575,26 @@ class RoomMemberWorkerStore(EventsWorkerStore): count = yield self.runInteraction("did_forget_membership", f) defer.returnValue(count == 0) + @defer.inlineCallbacks + def get_rooms_user_has_been_in(self, user_id): + """Get all rooms that the user has ever been in. + + Args: + user_id (str) + + Returns: + Deferred[set[str]]: Set of room IDs. + """ + + room_ids = yield self._simple_select_onecol( + table="room_memberships", + keyvalues={"membership": Membership.JOIN, "user_id": user_id}, + retcol="room_id", + desc="get_rooms_user_has_been_in", + ) + + return set(room_ids) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py new file mode 100644 index 0000000000..5e7d2d3361 --- /dev/null +++ b/tests/handlers/test_admin.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import Counter + +from mock import Mock + +import synapse.api.errors +import synapse.handlers.admin +import synapse.rest.admin +import synapse.storage +from synapse.api.constants import EventTypes +from synapse.rest.client.v1 import login, room + +from tests import unittest + + +class ExfiltrateData(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.admin_handler = hs.get_handlers().admin_handler + + self.user1 = self.register_user("user1", "password") + self.token1 = self.login("user1", "password") + + self.user2 = self.register_user("user2", "password") + self.token2 = self.login("user2", "password") + + def test_single_public_joined_room(self): + """Test that we write *all* events for a public room + """ + room_id = self.helper.create_room_as( + self.user1, tok=self.token1, is_public=True + ) + self.helper.send(room_id, body="Hello!", tok=self.token1) + self.helper.join(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Hello again!", tok=self.token1) + + writer = Mock() + + self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + + writer.write_events.assert_called() + + # Since we can see all events there shouldn't be any extremities, so no + # state should be written + writer.write_state.assert_not_called() + + # Collect all events that were written + written_events = [] + for (called_room_id, events), _ in writer.write_events.call_args_list: + self.assertEqual(called_room_id, room_id) + written_events.extend(events) + + # Check that the right number of events were written + counter = Counter( + (event.type, getattr(event, "state_key", None)) for event in written_events + ) + self.assertEqual(counter[(EventTypes.Message, None)], 2) + self.assertEqual(counter[(EventTypes.Member, self.user1)], 1) + self.assertEqual(counter[(EventTypes.Member, self.user2)], 1) + + def test_single_private_joined_room(self): + """Tests that we correctly write state when we can't see all events in + a room. + """ + room_id = self.helper.create_room_as(self.user1, tok=self.token1) + self.helper.send_state( + room_id, + EventTypes.RoomHistoryVisibility, + body={"history_visibility": "joined"}, + tok=self.token1, + ) + self.helper.send(room_id, body="Hello!", tok=self.token1) + self.helper.join(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Hello again!", tok=self.token1) + + writer = Mock() + + self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + + writer.write_events.assert_called() + + # Since we can't see all events there should be one extremity. + writer.write_state.assert_called_once() + + # Collect all events that were written + written_events = [] + for (called_room_id, events), _ in writer.write_events.call_args_list: + self.assertEqual(called_room_id, room_id) + written_events.extend(events) + + # Check that the right number of events were written + counter = Counter( + (event.type, getattr(event, "state_key", None)) for event in written_events + ) + self.assertEqual(counter[(EventTypes.Message, None)], 1) + self.assertEqual(counter[(EventTypes.Member, self.user1)], 1) + self.assertEqual(counter[(EventTypes.Member, self.user2)], 1) + + def test_single_left_room(self): + """Tests that we don't see events in the room after we leave. + """ + room_id = self.helper.create_room_as(self.user1, tok=self.token1) + self.helper.send(room_id, body="Hello!", tok=self.token1) + self.helper.join(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Hello again!", tok=self.token1) + self.helper.leave(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Helloooooo!", tok=self.token1) + + writer = Mock() + + self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + + writer.write_events.assert_called() + + # Since we can see all events there shouldn't be any extremities, so no + # state should be written + writer.write_state.assert_not_called() + + written_events = [] + for (called_room_id, events), _ in writer.write_events.call_args_list: + self.assertEqual(called_room_id, room_id) + written_events.extend(events) + + # Check that the right number of events were written + counter = Counter( + (event.type, getattr(event, "state_key", None)) for event in written_events + ) + self.assertEqual(counter[(EventTypes.Message, None)], 2) + self.assertEqual(counter[(EventTypes.Member, self.user1)], 1) + self.assertEqual(counter[(EventTypes.Member, self.user2)], 2) + + def test_single_left_rejoined_private_room(self): + """Tests that see the correct events in private rooms when we + repeatedly join and leave. + """ + room_id = self.helper.create_room_as(self.user1, tok=self.token1) + self.helper.send_state( + room_id, + EventTypes.RoomHistoryVisibility, + body={"history_visibility": "joined"}, + tok=self.token1, + ) + self.helper.send(room_id, body="Hello!", tok=self.token1) + self.helper.join(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Hello again!", tok=self.token1) + self.helper.leave(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Helloooooo!", tok=self.token1) + self.helper.join(room_id, self.user2, tok=self.token2) + self.helper.send(room_id, body="Helloooooo!!", tok=self.token1) + + writer = Mock() + + self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + + writer.write_events.assert_called_once() + + # Since we joined/left/joined again we expect there to be two gaps. + self.assertEqual(writer.write_state.call_count, 2) + + written_events = [] + for (called_room_id, events), _ in writer.write_events.call_args_list: + self.assertEqual(called_room_id, room_id) + written_events.extend(events) + + # Check that the right number of events were written + counter = Counter( + (event.type, getattr(event, "state_key", None)) for event in written_events + ) + self.assertEqual(counter[(EventTypes.Message, None)], 2) + self.assertEqual(counter[(EventTypes.Member, self.user1)], 1) + self.assertEqual(counter[(EventTypes.Member, self.user2)], 3) + + def test_invite(self): + """Tests that pending invites get handled correctly. + """ + room_id = self.helper.create_room_as(self.user1, tok=self.token1) + self.helper.send(room_id, body="Hello!", tok=self.token1) + self.helper.invite(room_id, self.user1, self.user2, tok=self.token1) + + writer = Mock() + + self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_state.assert_not_called() + writer.write_invite.assert_called_once() + + args = writer.write_invite.call_args[0] + self.assertEqual(args[0], room_id) + self.assertEqual(args[1].content["membership"], "invite") + self.assertTrue(args[2]) # Assert there is at least one bit of state diff --git a/tests/unittest.py b/tests/unittest.py index d26804b5b5..684d5cb1cf 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -443,7 +443,7 @@ class HomeserverTestCase(TestCase): "POST", "/_matrix/client/r0/admin/register", body.encode("utf8") ) self.render(request) - self.assertEqual(channel.code, 200) + self.assertEqual(channel.code, 200, channel.json_body) user_id = channel.json_body["user_id"] return user_id From 65dd5543f60bcd6c5137fb03ac297c97b9b16426 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Jul 2019 12:10:23 +0100 Subject: [PATCH 02/80] Newsfile --- changelog.d/5589.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5589.feature diff --git a/changelog.d/5589.feature b/changelog.d/5589.feature new file mode 100644 index 0000000000..a87e669dd4 --- /dev/null +++ b/changelog.d/5589.feature @@ -0,0 +1 @@ +Add ability to pull all locally stored events out of synapse that a particular user can see. From 9f3c0a8556a82960c795b8dc41a4666e0adebfef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 1 Jul 2019 17:55:26 +0100 Subject: [PATCH 03/80] Add basic admin cmd app --- synapse/app/_base.py | 17 +++- synapse/app/admin_cmd.py | 198 +++++++++++++++++++++++++++++++++++++++ synapse/config/_base.py | 48 +++++++++- 3 files changed, 257 insertions(+), 6 deletions(-) create mode 100644 synapse/app/admin_cmd.py diff --git a/synapse/app/_base.py b/synapse/app/_base.py index d50a9840d4..200978a58f 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -48,7 +48,7 @@ def register_sighup(func): _sighup_callbacks.append(func) -def start_worker_reactor(appname, config): +def start_worker_reactor(appname, config, run_command=reactor.run): """ Run the reactor in the main process Daemonizes if necessary, and then configures some resources, before starting @@ -57,6 +57,7 @@ def start_worker_reactor(appname, config): Args: appname (str): application name which will be sent to syslog config (synapse.config.Config): config object + run_command (Callable[]): callable that actually runs the reactor """ logger = logging.getLogger(config.worker_app) @@ -69,11 +70,19 @@ def start_worker_reactor(appname, config): daemonize=config.worker_daemonize, print_pidfile=config.print_pidfile, logger=logger, + run_command=run_command, ) def start_reactor( - appname, soft_file_limit, gc_thresholds, pid_file, daemonize, print_pidfile, logger + appname, + soft_file_limit, + gc_thresholds, + pid_file, + daemonize, + print_pidfile, + logger, + run_command=reactor.run, ): """ Run the reactor in the main process @@ -88,6 +97,7 @@ def start_reactor( daemonize (bool): true to run the reactor in a background process print_pidfile (bool): whether to print the pid file, if daemonize is True logger (logging.Logger): logger instance to pass to Daemonize + run_command (Callable[]): callable that actually runs the reactor """ install_dns_limiter(reactor) @@ -103,7 +113,8 @@ def start_reactor( change_resource_limit(soft_file_limit) if gc_thresholds: gc.set_threshold(*gc_thresholds) - reactor.run() + + run_command() if daemonize: if print_pidfile: diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py new file mode 100644 index 0000000000..bd73c47ae2 --- /dev/null +++ b/synapse/app/admin_cmd.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import sys + +from twisted.internet import defer, task + +import synapse +from synapse.app import _base +from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig +from synapse.config.logger import setup_logging +from synapse.handlers.admin import FileExfiltrationWriter +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.account_data import SlavedAccountDataStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.filtering import SlavedFilteringStore +from synapse.replication.slave.storage.groups import SlavedGroupServerStore +from synapse.replication.slave.storage.presence import SlavedPresenceStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.util.logcontext import LoggingContext +from synapse.util.versionstring import get_version_string + +logger = logging.getLogger("synapse.app.admin_cmd") + + +class AdminCmdSlavedStore( + SlavedReceiptsStore, + SlavedAccountDataStore, + SlavedApplicationServiceStore, + SlavedRegistrationStore, + SlavedFilteringStore, + SlavedPresenceStore, + SlavedGroupServerStore, + SlavedDeviceInboxStore, + SlavedDeviceStore, + SlavedPushRuleStore, + SlavedEventStore, + SlavedClientIpStore, + RoomStore, + BaseSlavedStore, +): + pass + + +class AdminCmdServer(HomeServer): + DATASTORE_CLASS = AdminCmdSlavedStore + + def _listen_http(self, listener_config): + pass + + def start_listening(self, listeners): + pass + + def build_tcp_replication(self): + return AdminCmdReplicationHandler(self) + + +class AdminCmdReplicationHandler(ReplicationClientHandler): + @defer.inlineCallbacks + def on_rdata(self, stream_name, token, rows): + pass + + def get_streams_to_replicate(self): + return {} + + +@defer.inlineCallbacks +def export_data_command(hs, user_id, directory): + """Export data for a user. + + Args: + user_id (str) + directory (str|None): Directory to write output to. Will create a temp + directory if not specified. + """ + + res = yield hs.get_handlers().admin_handler.exfiltrate_user_data( + user_id, FileExfiltrationWriter(user_id, directory=directory) + ) + print(res) + + +def start(config_options): + parser = HomeServerConfig.create_argument_parser("Synapse Admin Command") + + subparser = parser.add_subparsers( + title="Admin Commands", + description="Choose and admin command to perform.", + required=True, + dest="command", + metavar="", + help="The admin command to perform.", + ) + export_data_parser = subparser.add_parser( + "export-data", help="Export all data for a user" + ) + export_data_parser.add_argument("user_id", help="User to extra data from") + export_data_parser.add_argument( + "--output-directory", + action="store", + metavar="DIRECTORY", + required=False, + help="The directory to store the exported data in. Must be emtpy. Defaults" + " to creating a temp directory.", + ) + + try: + config, args = HomeServerConfig.load_config_with_parser(parser, config_options) + except ConfigError as e: + sys.stderr.write("\n" + str(e) + "\n") + sys.exit(1) + + if config.worker_app is not None: + assert config.worker_app == "synapse.app.admin_cmd" + + # Update the config with some basic overrides so that don't have to specify + # a full worker config. + config.worker_app = "synapse.app.admin_cmd" + + if ( + not config.worker_daemonize + and not config.worker_log_file + and not config.worker_log_config + ): + # Since we're meant to be run as a "command" let's not redirect stdio + # unless we've actually set log config. + config.no_redirect_stdio = True + + # Explicitly disable background processes + config.update_user_directory = False + config.start_pushers = False + config.send_federation = False + + setup_logging(config, use_worker_options=True) + + synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts + + database_engine = create_engine(config.database_config) + + ss = AdminCmdServer( + config.server_name, + db_config=config.database_config, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ss.setup() + + if args.command == "export-data": + command = lambda: export_data_command(ss, args.user_id, args.output_directory) + else: + # This shouldn't happen. + raise ConfigError("Unknown admin command %s" % (args.command,)) + + # We use task.react as the basic run command as it correctly handles tearing + # down the reactor when the deferreds resolve and setting the return value. + # We also make sure that `_base.start` gets run before we actually run the + # command. + + @defer.inlineCallbacks + def run(_reactor): + with LoggingContext("command"): + yield _base.start(ss, []) + yield command() + + _base.start_worker_reactor( + "synapse-admin-cmd", config, run_command=lambda: task.react(run) + ) + + +if __name__ == "__main__": + with LoggingContext("main"): + start(sys.argv[1:]) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 965478d8d5..14d3f7c1fe 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -201,6 +201,26 @@ class Config(object): Returns: Config object. """ + config_parser = cls.create_argument_parser(description) + obj, _ = cls.load_config_with_parser(config_parser, argv) + + return obj + + @classmethod + def create_argument_parser(cls, description): + """Create an ArgumentParser instance with all the config flags. + + Doesn't support config-file-generation: used by the worker apps. + + Used for workers where we want to add extra flags/subcommands. + + Args: + description (str): App description + + Returns: + ArgumentParser + """ + config_parser = argparse.ArgumentParser(description=description) config_parser.add_argument( "-c", @@ -219,9 +239,31 @@ class Config(object): " Defaults to the directory containing the last config file", ) - obj = cls() + # We can only invoke `add_arguments` on an actual object, but + # `add_arguments` should be side effect free so this is probably fine. + cls().invoke_all("add_arguments", config_parser) - obj.invoke_all("add_arguments", config_parser) + return config_parser + + @classmethod + def load_config_with_parser(cls, config_parser, argv): + """Parse the commandline and config files with the given parser + + Doesn't support config-file-generation: used by the worker apps. + + Used for workers where we want to add extra flags/subcommands. + + Args: + conifg_parser (ArgumentParser) + argv (list[str]) + + Returns: + tuple[HomeServerConfig, argparse.Namespace]: Returns the parsed + config object and the parsed argparse.Namespace object from + `config_parser.parse_args(..)` + """ + + obj = cls() config_args = config_parser.parse_args(argv) @@ -244,7 +286,7 @@ class Config(object): obj.invoke_all("read_arguments", config_args) - return obj + return obj, config_args @classmethod def load_or_generate_config(cls, description, argv): From 10fe904d88ae19d4214a249d9b24a99eb7edb618 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Jul 2019 17:21:27 +0100 Subject: [PATCH 04/80] Newsfile --- changelog.d/5597.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5597.feature diff --git a/changelog.d/5597.feature b/changelog.d/5597.feature new file mode 100644 index 0000000000..6f92748885 --- /dev/null +++ b/changelog.d/5597.feature @@ -0,0 +1 @@ +Add a basic admin command app to allow server operators to run Synapse admin commands separately from the main production instance. From d0b849c86d93ace21bdf7f73e1411f33a9e1b2fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Jul 2019 15:03:38 +0100 Subject: [PATCH 05/80] Apply comment fixups from code review Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/handlers/admin.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index e424fc46bd..6c905e97a7 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -100,7 +100,7 @@ class AdminHandler(BaseHandler): @defer.inlineCallbacks def exfiltrate_user_data(self, user_id, writer): - """Write all data we have of the user to the specified directory. + """Write all data we have on the user to the given writer. Args: user_id (str) @@ -178,7 +178,7 @@ class AdminHandler(BaseHandler): # We fetch events in the room the user could see by fetching *all* # events that we have and then filtering, this isn't the most - # efficient method perhaps but it does guarentee we get everything. + # efficient method perhaps but it does guarantee we get everything. while True: events, _ = yield self.store.paginate_room_events( room_id, from_key, to_key, limit=100, direction="f" @@ -233,7 +233,7 @@ class AdminHandler(BaseHandler): class ExfiltrationWriter(object): - """Interfaced used to specify how to write exfilrated data. + """Interface used to specify how to write exfiltrated data. """ def write_events(self, room_id, events): @@ -263,7 +263,7 @@ class ExfiltrationWriter(object): Args: room_id (str) - invite (FrozenEvent) + event (FrozenEvent) state (list[dict]): A subset of the state at the invite, with a subset of the event keys (type, state_key, content and sender) """ @@ -276,13 +276,13 @@ class ExfiltrationWriter(object): class FileExfiltrationWriter(ExfiltrationWriter): - """An ExfiltrationWriter that writes the users data to a directory. + """An ExfiltrationWriter that writes the user's data to a directory. Returns the directory location on completion. Args: user_id (str): The user whose data is being exfiltrated. - directory (str|None): The directory to write the data to, if None then + directory (str|None): The directory to write the data to. If None then will write to a temporary directory. """ From c061d4f237273f3400dc8e62aa7421f02caec3dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 Jul 2019 11:07:09 +0100 Subject: [PATCH 06/80] Fixup from review comments. --- synapse/handlers/admin.py | 39 ++++++++++++++++++++---------------- tests/handlers/test_admin.py | 10 ++++----- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 6c905e97a7..69d2c8c36f 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -99,7 +99,7 @@ class AdminHandler(BaseHandler): defer.returnValue(ret) @defer.inlineCallbacks - def exfiltrate_user_data(self, user_id, writer): + def export_user_data(self, user_id, writer): """Write all data we have on the user to the given writer. Args: @@ -107,7 +107,8 @@ class AdminHandler(BaseHandler): writer (ExfiltrationWriter) Returns: - defer.Deferred + defer.Deferred: Resolves when all data for a user has been written. + The returned value is that returned by `writer.finished()`. """ # Get all rooms the user is in or has been in rooms = yield self.store.get_rooms_for_user_where_membership_is( @@ -134,7 +135,7 @@ class AdminHandler(BaseHandler): forgotten = yield self.store.did_forget(user_id, room_id) if forgotten: - logger.info("[%s] User forgot room %d, ignoring", room_id) + logger.info("[%s] User forgot room %d, ignoring", user_id, room_id) continue if room_id not in rooms_user_has_been_in: @@ -172,9 +173,10 @@ class AdminHandler(BaseHandler): # dict[str, set[str]]. event_to_unseen_prevs = {} - # The reverse mapping to above, i.e. map from unseen event to parent - # events. dict[str, set[str]] - unseen_event_to_parents = {} + # The reverse mapping to above, i.e. map from unseen event to events + # that have the unseen event in their prev_events, i.e. the unseen + # events "children". dict[str, set[str]] + unseen_to_child_events = {} # We fetch events in the room the user could see by fetching *all* # events that we have and then filtering, this isn't the most @@ -200,14 +202,14 @@ class AdminHandler(BaseHandler): if unseen_events: event_to_unseen_prevs[event.event_id] = unseen_events for unseen in unseen_events: - unseen_event_to_parents.setdefault(unseen, set()).add( + unseen_to_child_events.setdefault(unseen, set()).add( event.event_id ) # Now check if this event is an unseen prev event, if so # then we remove this event from the appropriate dicts. - for event_id in unseen_event_to_parents.pop(event.event_id, []): - event_to_unseen_prevs.get(event_id, set()).discard( + for child_id in unseen_to_child_events.pop(event.event_id, []): + event_to_unseen_prevs.get(child_id, set()).discard( event.event_id ) @@ -233,7 +235,7 @@ class AdminHandler(BaseHandler): class ExfiltrationWriter(object): - """Interface used to specify how to write exfiltrated data. + """Interface used to specify how to write exported data. """ def write_events(self, room_id, events): @@ -254,7 +256,7 @@ class ExfiltrationWriter(object): Args: room_id (str) event_id (str) - state (list[FrozenEvent]) + state (dict[tuple[str, str], FrozenEvent]) """ pass @@ -264,13 +266,16 @@ class ExfiltrationWriter(object): Args: room_id (str) event (FrozenEvent) - state (list[dict]): A subset of the state at the invite, with a - subset of the event keys (type, state_key, content and sender) + state (dict[tuple[str, str], dict]): A subset of the state at the + invite, with a subset of the event keys (type, state_key + content and sender) """ def finished(self): - """Called when exfiltration is complete, and the return valus is passed - to the requester. + """Called when all data has succesfully been exported and written. + + This functions return value is passed to the caller of + `export_user_data`. """ pass @@ -281,7 +286,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): Returns the directory location on completion. Args: - user_id (str): The user whose data is being exfiltrated. + user_id (str): The user whose data is being exported. directory (str|None): The directory to write the data to. If None then will write to a temporary directory. """ @@ -293,7 +298,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): self.base_directory = directory else: self.base_directory = tempfile.mkdtemp( - prefix="synapse-exfiltrate__%s__" % (user_id,) + prefix="synapse-exported__%s__" % (user_id,) ) os.makedirs(self.base_directory, exist_ok=True) diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 5e7d2d3361..fc37c4328c 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -55,7 +55,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): writer = Mock() - self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) writer.write_events.assert_called() @@ -94,7 +94,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): writer = Mock() - self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) writer.write_events.assert_called() @@ -127,7 +127,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): writer = Mock() - self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) writer.write_events.assert_called() @@ -169,7 +169,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): writer = Mock() - self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) writer.write_events.assert_called_once() @@ -198,7 +198,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): writer = Mock() - self.get_success(self.admin_handler.exfiltrate_user_data(self.user2, writer)) + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) writer.write_events.assert_not_called() writer.write_state.assert_not_called() From 9ccea16d45416397b37fa407709ff455bca415e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jul 2019 14:07:56 +0100 Subject: [PATCH 07/80] Assume key existence. Update docstrings --- synapse/handlers/admin.py | 4 +--- synapse/storage/stream.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 69d2c8c36f..f06914a378 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -209,9 +209,7 @@ class AdminHandler(BaseHandler): # Now check if this event is an unseen prev event, if so # then we remove this event from the appropriate dicts. for child_id in unseen_to_child_events.pop(event.event_id, []): - event_to_unseen_prevs.get(child_id, set()).discard( - event.event_id - ) + event_to_unseen_prevs[child_id].discard(event.event_id) written_events.add(event.event_id) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d9482a3843..7b5b3b8c8d 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -833,7 +833,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[tuple[list[_EventDictReturn], str]]: Returns the results as a list of _EventDictReturn and a token that points to the end - of the result set. + of the result set. If no events are returned then the end of the + stream has been reached (i.e. there are no events between + `from_token` and `to_token`). """ assert int(limit) >= 0 @@ -905,15 +907,17 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): only those before direction(char): Either 'b' or 'f' to indicate whether we are paginating forwards or backwards from `from_key`. - limit (int): The maximum number of events to return. Zero or less - means no limit. + limit (int): The maximum number of events to return. event_filter (Filter|None): If provided filters the events to those that match the filter. Returns: - tuple[list[dict], str]: Returns the results as a list of dicts and - a token that points to the end of the result set. The dicts have - the keys "event_id", "topological_ordering" and "stream_orderign". + tuple[list[FrozenEvents], str]: Returns the results as a list of + dicts and a token that points to the end of the result set. The + dicts have the keys "event_id", "topological_ordering" and + "stream_ordering". If no events are returned then the end of the + stream has been reached (i.e. there are no events between + `from_key` and `to_key`). """ from_key = RoomStreamToken.parse(from_key) From eadb13d2e9caaa391f4efe2609a7d54d1723d311 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jul 2019 14:15:00 +0100 Subject: [PATCH 08/80] Remove FileExfiltrationWriter --- synapse/handlers/admin.py | 63 --------------------------------------- 1 file changed, 63 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index f06914a378..5ff02c12bf 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -276,66 +276,3 @@ class ExfiltrationWriter(object): `export_user_data`. """ pass - - -class FileExfiltrationWriter(ExfiltrationWriter): - """An ExfiltrationWriter that writes the user's data to a directory. - - Returns the directory location on completion. - - Args: - user_id (str): The user whose data is being exported. - directory (str|None): The directory to write the data to. If None then - will write to a temporary directory. - """ - - def __init__(self, user_id, directory=None): - self.user_id = user_id - - if directory: - self.base_directory = directory - else: - self.base_directory = tempfile.mkdtemp( - prefix="synapse-exported__%s__" % (user_id,) - ) - - os.makedirs(self.base_directory, exist_ok=True) - if list(os.listdir(self.base_directory)): - raise Exception("Directory must be empty") - - def write_events(self, room_id, events): - room_directory = os.path.join(self.base_directory, "rooms", room_id) - os.makedirs(room_directory, exist_ok=True) - events_file = os.path.join(room_directory, "events") - - with open(events_file, "a") as f: - for event in events: - print(json.dumps(event.get_pdu_json()), file=f) - - def write_state(self, room_id, event_id, state): - room_directory = os.path.join(self.base_directory, "rooms", room_id) - state_directory = os.path.join(room_directory, "state") - os.makedirs(state_directory, exist_ok=True) - - event_file = os.path.join(state_directory, event_id) - - with open(event_file, "a") as f: - for event in state.values(): - print(json.dumps(event.get_pdu_json()), file=f) - - def write_invite(self, room_id, event, state): - self.write_events(room_id, [event]) - - # We write the invite state somewhere else as they aren't full events - # and are only a subset of the state at the event. - room_directory = os.path.join(self.base_directory, "rooms", room_id) - os.makedirs(room_directory, exist_ok=True) - - invite_state = os.path.join(room_directory, "invite_state") - - with open(invite_state, "a") as f: - for event in state.values(): - print(json.dumps(event), file=f) - - def finished(self): - return self.base_directory From b4f5416dd9bd7635a4b859e3d13eaee992096ef7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jul 2019 14:41:29 +0100 Subject: [PATCH 09/80] pep8 --- synapse/handlers/admin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 5ff02c12bf..e8a651e231 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -14,10 +14,6 @@ # limitations under the License. import logging -import os -import tempfile - -from canonicaljson import json from twisted.internet import defer From 1a807dfe6855b15d8574eb92541dd84b946f16bd Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 14:19:39 +0100 Subject: [PATCH 10/80] Use application/json when querying the IS's /store-invite endpoint --- synapse/handlers/room_member.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 4d6e883802..b050967b8f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -29,7 +29,7 @@ from twisted.internet import defer import synapse.server import synapse.types from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes, SynapseError +from synapse.api.errors import AuthError, Codes, HttpResponseException, SynapseError from synapse.types import RoomID, UserID from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room, user_left_room @@ -904,9 +904,22 @@ class RoomMemberHandler(object): } ) - data = yield self.simple_http_client.post_urlencoded_get_json( - is_url, invite_config - ) + try: + data = yield self.simple_http_client.post_json_get_json( + is_url, invite_config + ) + except HttpResponseException as e: + # Some identity servers may only support application/x-www-form-urlencoded + # types. This is especially true with old instances of Sydent, see + # https://github.com/matrix-org/sydent/pull/170 + logger.info( + "Failed to POST %s with JSON, falling back to urlencoded form: %s", + is_url, e, + ) + data = yield self.simple_http_client.post_urlencoded_get_json( + is_url, invite_config + ) + # TODO: Check for success token = data["token"] public_keys = data.get("public_keys", []) From f05c7d62bc2bb504f0048adf6e68decdbe31f80d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 14:29:27 +0100 Subject: [PATCH 11/80] Lint --- synapse/handlers/room_member.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index b050967b8f..c3420b4b22 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -914,7 +914,8 @@ class RoomMemberHandler(object): # https://github.com/matrix-org/sydent/pull/170 logger.info( "Failed to POST %s with JSON, falling back to urlencoded form: %s", - is_url, e, + is_url, + e, ) data = yield self.simple_http_client.post_urlencoded_get_json( is_url, invite_config From c142e5d16ad4dc5a4cbc93d7d19b2902be72644b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 14:29:37 +0100 Subject: [PATCH 12/80] Changelog --- changelog.d/5638.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5638.bugfix diff --git a/changelog.d/5638.bugfix b/changelog.d/5638.bugfix new file mode 100644 index 0000000000..66781ad9e6 --- /dev/null +++ b/changelog.d/5638.bugfix @@ -0,0 +1 @@ +Fix requests to the `/store_invite` endpoint of identity servers being sent in the wrong format. From 7556851665ce623ce49f6fd8eaf22c1b4f234b9d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 17:31:00 +0100 Subject: [PATCH 13/80] Allow newly-registered users to lookup their own profiles When a user creates an account and the 'require_auth_for_profile_requests' config flag is set, and a client that performed the registration wants to lookup the newly-created profile, the request will be denied because the user doesn't share a room with themselves yet. --- synapse/handlers/profile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index d8462b75ec..a2388a7091 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -303,6 +303,10 @@ class BaseProfileHandler(BaseHandler): if not self.hs.config.require_auth_for_profile_requests or not requester: return + # Always allow the user to query their own profile. + if target_user.to_string() == requester.to_string(): + return + try: requester_rooms = yield self.store.get_rooms_for_user(requester.to_string()) target_user_rooms = yield self.store.get_rooms_for_user( From f3615a8aa5f93438629749446a49078aa487f11a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 17:31:58 +0100 Subject: [PATCH 14/80] Changelog --- changelog.d/5644.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5644.bugfix diff --git a/changelog.d/5644.bugfix b/changelog.d/5644.bugfix new file mode 100644 index 0000000000..f6302fd08d --- /dev/null +++ b/changelog.d/5644.bugfix @@ -0,0 +1 @@ +Fix newly-registered users not being able to lookup their own profile without joining a room. From 5e01e9ac1914cff89d54350df5270c1a2b7ccc42 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 17:41:16 +0100 Subject: [PATCH 15/80] Add test case --- tests/rest/client/v1/test_profile.py | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index dff9b2f10c..a76dda9503 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -288,3 +288,50 @@ class ProfilesRestrictedTestCase(unittest.HomeserverTestCase): # if the user isn't already in the room), because we only want to # make sure the user isn't in the room. pass + + +class OwnProfileUnrestrictedTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets_for_client_rest_resource, + login.register_servlets, + profile.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + config = self.default_config() + config["require_auth_for_profile_requests"] = True + self.hs = self.setup_test_homeserver(config=config) + + return self.hs + + def prepare(self, reactor, clock, hs): + # User requesting the profile. + self.requester = self.register_user("requester", "pass") + self.requester_tok = self.login("requester", "pass") + + def test_can_lookup_own_profile(self): + """Tests that a user can lookup their own profile without having to be in a room + if 'require_auth_for_profile_requests' is set to true in the server's config. + """ + request, channel = self.make_request( + "GET", "/profile/" + self.requester, access_token=self.requester_tok + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.result) + + request, channel = self.make_request( + "GET", + "/profile/" + self.requester + "/displayname", + access_token=self.requester_tok + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.result) + + request, channel = self.make_request( + "GET", + "/profile/" + self.requester + "/avatar_url", + access_token=self.requester_tok + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.result) From 73cb716b3c97f018efe00c6ca7a80b7c6d48c0e1 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 8 Jul 2019 17:44:20 +0100 Subject: [PATCH 16/80] Lint --- tests/rest/client/v1/test_profile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index a76dda9503..140d8b3772 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -323,7 +323,7 @@ class OwnProfileUnrestrictedTestCase(unittest.HomeserverTestCase): request, channel = self.make_request( "GET", "/profile/" + self.requester + "/displayname", - access_token=self.requester_tok + access_token=self.requester_tok, ) self.render(request) self.assertEqual(channel.code, 200, channel.result) @@ -331,7 +331,7 @@ class OwnProfileUnrestrictedTestCase(unittest.HomeserverTestCase): request, channel = self.make_request( "GET", "/profile/" + self.requester + "/avatar_url", - access_token=self.requester_tok + access_token=self.requester_tok, ) self.render(request) self.assertEqual(channel.code, 200, channel.result) From d88421ab03e72a6c7f69ca38a57b4b6212f1bc82 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 9 Jul 2019 13:43:08 +0100 Subject: [PATCH 17/80] Include the original event in /relations (#5626) When asking for the relations of an event, include the original event in the response. This will mostly be used for efficiently showing edit history, but could be useful in other circumstances. --- changelog.d/5626.feature | 1 + synapse/rest/client/v2_alpha/relations.py | 8 +++++--- synapse/storage/relations.py | 2 +- tests/rest/client/v2_alpha/test_relations.py | 5 +++++ 4 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5626.feature diff --git a/changelog.d/5626.feature b/changelog.d/5626.feature new file mode 100644 index 0000000000..5ef793b943 --- /dev/null +++ b/changelog.d/5626.feature @@ -0,0 +1 @@ +Include the original event when asking for its relations. diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index 8e362782cc..458afd135f 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -145,9 +145,9 @@ class RelationPaginationServlet(RestServlet): room_id, requester.user.to_string() ) - # This checks that a) the event exists and b) the user is allowed to - # view it. - yield self.event_handler.get_event(requester.user, room_id, parent_id) + # This gets the original event and checks that a) the event exists and + # b) the user is allowed to view it. + event = yield self.event_handler.get_event(requester.user, room_id, parent_id) limit = parse_integer(request, "limit", default=5) from_token = parse_string(request, "from") @@ -173,10 +173,12 @@ class RelationPaginationServlet(RestServlet): ) now = self.clock.time_msec() + original_event = yield self._event_serializer.serialize_event(event, now) events = yield self._event_serializer.serialize_events(events, now) return_value = result.to_dict() return_value["chunk"] = events + return_value["original_event"] = original_event defer.returnValue((200, return_value)) diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index 1b01934c19..9954bc094f 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -60,7 +60,7 @@ class PaginationChunk(object): class RelationPaginationToken(object): """Pagination token for relation pagination API. - As the results are order by topological ordering, we can use the + As the results are in topological order, we can use the `topological_ordering` and `stream_ordering` fields of the events at the boundaries of the chunk as pagination tokens. diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index 6bb7d92638..58c6951852 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -126,6 +126,11 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel.json_body["chunk"][0], ) + # We also expect to get the original event (the id of which is self.parent_id) + self.assertEquals( + channel.json_body["original_event"]["event_id"], self.parent_id + ) + # Make sure next_batch has something in it that looks like it could be a # valid token. self.assertIsInstance( From 7b3bc755a34cf97138e614379234cfc47f91a5a9 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Tue, 9 Jul 2019 13:37:39 -0400 Subject: [PATCH 18/80] remove unused and unnecessary check for FederationDeniedError (#5645) FederationDeniedError is a subclass of SynapseError, which is a subclass of CodeMessageException, so if e is a FederationDeniedError, then this check for FederationDeniedError will never be reached since it will be caught by the check for CodeMessageException above. The check for CodeMessageException does almost the same thing as this check (since FederationDeniedError initialises with code=403 and msg="Federation denied with %s."), so may as well just keep allowing it to handle this case. --- changelog.d/5645.misc | 1 + synapse/handlers/e2e_keys.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5645.misc diff --git a/changelog.d/5645.misc b/changelog.d/5645.misc new file mode 100644 index 0000000000..4fa9699e4f --- /dev/null +++ b/changelog.d/5645.misc @@ -0,0 +1 @@ +Remove unused and unnecessary check for FederationDeniedError in _exception_to_failure. \ No newline at end of file diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 55b4ab3a1a..fdfe8611b6 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -22,7 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer -from synapse.api.errors import CodeMessageException, FederationDeniedError, SynapseError +from synapse.api.errors import CodeMessageException, SynapseError from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.types import UserID, get_domain_from_id from synapse.util.retryutils import NotRetryingDestination @@ -350,9 +350,6 @@ def _exception_to_failure(e): if isinstance(e, NotRetryingDestination): return {"status": 503, "message": "Not ready for retry"} - if isinstance(e, FederationDeniedError): - return {"status": 403, "message": "Federation Denied"} - # include ConnectionRefused and other errors # # Note that some Exceptions (notably twisted's ResponseFailed etc) don't From 4d122d295c50ae933def7880d8d9d1e1a7846e6c Mon Sep 17 00:00:00 2001 From: Bruno Windels Date: Wed, 10 Jul 2019 12:55:24 +0000 Subject: [PATCH 19/80] Correct pep517 flag in readme (#5651) --- README.rst | 2 +- changelog.d/5651.doc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5651.doc diff --git a/README.rst b/README.rst index 13e11a5773..bbff8de5ab 100644 --- a/README.rst +++ b/README.rst @@ -272,7 +272,7 @@ to install using pip and a virtualenv:: virtualenv -p python3 env source env/bin/activate - python -m pip install --no-pep-517 -e .[all] + python -m pip install --no-use-pep517 -e .[all] This will run a process of downloading and installing all the needed dependencies into a virtual env. diff --git a/changelog.d/5651.doc b/changelog.d/5651.doc new file mode 100644 index 0000000000..e2d5a8dc8a --- /dev/null +++ b/changelog.d/5651.doc @@ -0,0 +1 @@ +--no-pep517 should be --no-use-pep517 in the documentation to setup the development environment. From 3dd61d12cdd66000b9cf078f8f485c0c40e4235e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 10 Jul 2019 14:03:18 +0100 Subject: [PATCH 20/80] Add a linting script (#5627) Add a dev script to cover all the different linting steps. --- changelog.d/5627.misc | 1 + scripts-dev/lint.sh | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 changelog.d/5627.misc create mode 100755 scripts-dev/lint.sh diff --git a/changelog.d/5627.misc b/changelog.d/5627.misc new file mode 100644 index 0000000000..730721b5ef --- /dev/null +++ b/changelog.d/5627.misc @@ -0,0 +1 @@ +Add `lint.sh` to the scripts-dev folder which will run all linting steps required by CI. diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh new file mode 100755 index 0000000000..ebb4d69f86 --- /dev/null +++ b/scripts-dev/lint.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# Runs linting scripts over the local Synapse checkout +# isort - sorts import statements +# flake8 - lints and finds mistakes +# black - opinionated code formatter + +set -e + +isort -y -rc synapse tests scripts-dev scripts +flake8 synapse tests +python3 -m black synapse tests scripts-dev scripts From f28171458342c474e7a091fff022972afb366169 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 10 Jul 2019 14:43:11 +0100 Subject: [PATCH 21/80] Don't bundle aggregations when retrieving the original event (#5654) A fix for PR #5626, which returned the original event content as part of a call to /relations. Only problem was that we were attempting to aggregate the relations on top of it when we did so. We now set bundle_aggregations to False in the get_event call. We also do this when pulling the relation events as well, because edits of edits are not something we'd like to support here. --- changelog.d/5654.bugfix | 1 + synapse/rest/client/v2_alpha/relations.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5654.bugfix diff --git a/changelog.d/5654.bugfix b/changelog.d/5654.bugfix new file mode 100644 index 0000000000..5f76b041cd --- /dev/null +++ b/changelog.d/5654.bugfix @@ -0,0 +1 @@ +Fix bug in #5626 that prevented the original_event field from actually having the contents of the original event in a call to `/relations`. \ No newline at end of file diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index 458afd135f..7ce485b471 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -173,8 +173,18 @@ class RelationPaginationServlet(RestServlet): ) now = self.clock.time_msec() - original_event = yield self._event_serializer.serialize_event(event, now) - events = yield self._event_serializer.serialize_events(events, now) + # We set bundle_aggregations to False when retrieving the original + # event because we want the content before relations were applied to + # it. + original_event = yield self._event_serializer.serialize_event( + event, now, bundle_aggregations=False + ) + # Similarly, we don't allow relations to be applied to relations, so we + # return the original relations without any aggregations on top of them + # here. + events = yield self._event_serializer.serialize_events( + events, now, bundle_aggregations=False + ) return_value = result.to_dict() return_value["chunk"] = events From f77e99761919db671960aae4792cb563ad2b8e53 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 10 Jul 2019 15:46:42 +0100 Subject: [PATCH 22/80] Send 3PID bind requests as JSON data --- changelog.d/5656.bugfix | 1 + synapse/handlers/identity.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5656.bugfix diff --git a/changelog.d/5656.bugfix b/changelog.d/5656.bugfix new file mode 100644 index 0000000000..f6ae906a9a --- /dev/null +++ b/changelog.d/5656.bugfix @@ -0,0 +1 @@ +Fix 3PID bind requests being sent to identity servers as `application/x-form-www-urlencoded` data, which is deprecated. diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index c82b1933f2..ee6c2c4f8b 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -118,7 +118,7 @@ class IdentityHandler(BaseHandler): raise SynapseError(400, "No client_secret in creds") try: - data = yield self.http_client.post_urlencoded_get_json( + data = yield self.http_client.post_post_get_json( "https://%s%s" % (id_server, "/_matrix/identity/api/v1/3pid/bind"), {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid}, ) From 351d9bd3179dac7a4ad0f25b88e4314aab4527cd Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 10 Jul 2019 15:48:50 +0100 Subject: [PATCH 23/80] Rename changelog file --- changelog.d/{5656.bugfix => 5658.bugfix} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{5656.bugfix => 5658.bugfix} (100%) diff --git a/changelog.d/5656.bugfix b/changelog.d/5658.bugfix similarity index 100% rename from changelog.d/5656.bugfix rename to changelog.d/5658.bugfix From b2a2e96ea6d6a2e4f47a9ad28919371227a5dd49 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 10 Jul 2019 15:56:21 +0100 Subject: [PATCH 24/80] Typo --- synapse/handlers/identity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index ee6c2c4f8b..546d6169e9 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -118,7 +118,7 @@ class IdentityHandler(BaseHandler): raise SynapseError(400, "No client_secret in creds") try: - data = yield self.http_client.post_post_get_json( + data = yield self.http_client.post_json_get_json( "https://%s%s" % (id_server, "/_matrix/identity/api/v1/3pid/bind"), {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid}, ) From 953dbb79808c018fe34999a662f4c7cef8ea3721 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 10 Jul 2019 16:26:49 +0100 Subject: [PATCH 25/80] Remove access-token support from RegistrationStore.register (#5642) The 'token' param is no longer used anywhere except the tests, so let's kill that off too. --- changelog.d/5642.misc | 1 + synapse/handlers/register.py | 2 +- synapse/storage/registration.py | 24 +++-------------- tests/api/test_auth.py | 2 +- tests/handlers/test_register.py | 6 +---- tests/handlers/test_user_directory.py | 16 ++++------- tests/storage/test_client_ips.py | 4 +-- tests/storage/test_monthly_active_users.py | 23 +++++++--------- tests/storage/test_registration.py | 31 +++++----------------- 9 files changed, 30 insertions(+), 79 deletions(-) create mode 100644 changelog.d/5642.misc diff --git a/changelog.d/5642.misc b/changelog.d/5642.misc new file mode 100644 index 0000000000..e7f8e214a4 --- /dev/null +++ b/changelog.d/5642.misc @@ -0,0 +1 @@ +Remove access-token support from `RegistrationStore.register`, and rename it. diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index a3e553d5f5..420c5cb5bc 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -584,7 +584,7 @@ class RegistrationHandler(BaseHandler): address=address, ) else: - return self.store.register( + return self.store.register_user( user_id=user_id, password_hash=password_hash, was_guest=was_guest, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index aea5b3276b..8e217c9408 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -698,10 +698,9 @@ class RegistrationStore( desc="add_access_token_to_user", ) - def register( + def register_user( self, user_id, - token=None, password_hash=None, was_guest=False, make_guest=False, @@ -714,9 +713,6 @@ class RegistrationStore( Args: user_id (str): The desired user ID to register. - token (str): The desired access token to use for this user. If this - is not None, the given access token is associated with the user - id. password_hash (str): Optional. The password hash for this user. was_guest (bool): Optional. Whether this is a guest account being upgraded to a non-guest account. @@ -733,10 +729,9 @@ class RegistrationStore( StoreError if the user_id could not be registered. """ return self.runInteraction( - "register", - self._register, + "register_user", + self._register_user, user_id, - token, password_hash, was_guest, make_guest, @@ -746,11 +741,10 @@ class RegistrationStore( user_type, ) - def _register( + def _register_user( self, txn, user_id, - token, password_hash, was_guest, make_guest, @@ -763,8 +757,6 @@ class RegistrationStore( now = int(self.clock.time()) - next_id = self._access_tokens_id_gen.get_next() - try: if was_guest: # Ensure that the guest user actually exists @@ -812,14 +804,6 @@ class RegistrationStore( if self._account_validity.enabled: self.set_expiration_date_for_user_txn(txn, user_id) - if token: - # it's possible for this to get a conflict, but only for a single user - # since tokens are namespaced based on their user ID - txn.execute( - "INSERT INTO access_tokens(id, user_id, token)" " VALUES (?,?,?)", - (next_id, user_id, token), - ) - if create_profile_with_displayname: # set a default displayname serverside to avoid ugly race # between auto-joins and clients trying to set displaynames diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index d4e75b5b2e..96b26f974b 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -325,7 +325,7 @@ class AuthTestCase(unittest.TestCase): unknown_threepid = {"medium": "email", "address": "unreserved@server.com"} self.hs.config.mau_limits_reserved_threepids = [threepid] - yield self.store.register(user_id="user1", token="123", password_hash=None) + yield self.store.register_user(user_id="user1", password_hash=None) with self.assertRaises(ResourceLimitError): yield self.auth.check_auth_blocking() diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 8197f26d4f..1b7e1dacee 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -77,11 +77,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): store = self.hs.get_datastore() frank = UserID.from_string("@frank:test") self.get_success( - store.register( - user_id=frank.to_string(), - token="jkv;g498752-43gj['eamb!-5", - password_hash=None, - ) + store.register_user(user_id=frank.to_string(), password_hash=None) ) local_part = frank.localpart user_id = frank.to_string() diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index b135486c48..c5e91a8c41 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -47,11 +47,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" self.get_success( - self.store.register( - user_id=support_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT, + self.store.register_user( + user_id=support_user_id, password_hash=None, user_type=UserTypes.SUPPORT ) ) @@ -73,11 +70,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def test_handle_user_deactivated_support_user(self): s_user_id = "@support:test" self.get_success( - self.store.register( - user_id=s_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT, + self.store.register_user( + user_id=s_user_id, password_hash=None, user_type=UserTypes.SUPPORT ) ) @@ -90,7 +84,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def test_handle_user_deactivated_regular_user(self): r_user_id = "@regular:test" self.get_success( - self.store.register(user_id=r_user_id, token="123", password_hash=None) + self.store.register_user(user_id=r_user_id, password_hash=None) ) self.store.remove_from_user_dir = Mock() self.get_success(self.handler.handle_user_deactivated(r_user_id)) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 59c6f8c227..09305c3bf1 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -185,9 +185,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.hs.config.limit_usage_by_mau = True self.hs.config.max_mau_value = 50 user_id = "@user:server" - self.get_success( - self.store.register(user_id=user_id, token="123", password_hash=None) - ) + self.get_success(self.store.register_user(user_id=user_id, password_hash=None)) active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertFalse(active) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 0ce0b991f9..1494650d10 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -53,10 +53,10 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): # -1 because user3 is a support user and does not count user_num = len(threepids) - 1 - self.store.register(user_id=user1, token="123", password_hash=None) - self.store.register(user_id=user2, token="456", password_hash=None) - self.store.register( - user_id=user3, token="789", password_hash=None, user_type=UserTypes.SUPPORT + self.store.register_user(user_id=user1, password_hash=None) + self.store.register_user(user_id=user2, password_hash=None) + self.store.register_user( + user_id=user3, password_hash=None, user_type=UserTypes.SUPPORT ) self.pump() @@ -161,9 +161,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): def test_populate_monthly_users_is_guest(self): # Test that guest users are not added to mau list user_id = "@user_id:host" - self.store.register( - user_id=user_id, token="123", password_hash=None, make_guest=True - ) + self.store.register_user(user_id=user_id, password_hash=None, make_guest=True) self.store.upsert_monthly_active_user = Mock() self.store.populate_monthly_active_users(user_id) self.pump() @@ -216,8 +214,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.assertEquals(self.get_success(count), 0) # Test reserved registed users - self.store.register(user_id=user1, token="123", password_hash=None) - self.store.register(user_id=user2, token="456", password_hash=None) + self.store.register_user(user_id=user1, password_hash=None) + self.store.register_user(user_id=user2, password_hash=None) self.pump() now = int(self.hs.get_clock().time_msec()) @@ -232,11 +230,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): self.pump() self.assertEqual(self.get_success(count), 0) - self.store.register( - user_id=support_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT, + self.store.register_user( + user_id=support_user_id, password_hash=None, user_type=UserTypes.SUPPORT ) self.store.upsert_monthly_active_user(support_user_id) diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 625b651e91..9365c4622d 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -37,7 +37,7 @@ class RegistrationStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_register(self): - yield self.store.register(self.user_id, self.tokens[0], self.pwhash) + yield self.store.register_user(self.user_id, self.pwhash) self.assertEquals( { @@ -53,15 +53,9 @@ class RegistrationStoreTestCase(unittest.TestCase): (yield self.store.get_user_by_id(self.user_id)), ) - result = yield self.store.get_user_by_access_token(self.tokens[0]) - - self.assertDictContainsSubset({"name": self.user_id}, result) - - self.assertTrue("token_id" in result) - @defer.inlineCallbacks def test_add_tokens(self): - yield self.store.register(self.user_id, self.tokens[0], self.pwhash) + yield self.store.register_user(self.user_id, self.pwhash) yield self.store.add_access_token_to_user( self.user_id, self.tokens[1], self.device_id ) @@ -77,7 +71,8 @@ class RegistrationStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_user_delete_access_tokens(self): # add some tokens - yield self.store.register(self.user_id, self.tokens[0], self.pwhash) + yield self.store.register_user(self.user_id, self.pwhash) + yield self.store.add_access_token_to_user(self.user_id, self.tokens[0]) yield self.store.add_access_token_to_user( self.user_id, self.tokens[1], self.device_id ) @@ -108,24 +103,12 @@ class RegistrationStoreTestCase(unittest.TestCase): res = yield self.store.is_support_user(None) self.assertFalse(res) - yield self.store.register(user_id=TEST_USER, token="123", password_hash=None) + yield self.store.register_user(user_id=TEST_USER, password_hash=None) res = yield self.store.is_support_user(TEST_USER) self.assertFalse(res) - yield self.store.register( - user_id=SUPPORT_USER, - token="456", - password_hash=None, - user_type=UserTypes.SUPPORT, + yield self.store.register_user( + user_id=SUPPORT_USER, password_hash=None, user_type=UserTypes.SUPPORT ) res = yield self.store.is_support_user(SUPPORT_USER) self.assertTrue(res) - - -class TokenGenerator: - def __init__(self): - self._last_issued_token = 0 - - def generate(self, user_id): - self._last_issued_token += 1 - return "%s-%d" % (user_id, self._last_issued_token) From 1890cfcf82aa3e69530e97bf9ce783e390f22fbe Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 10 Jul 2019 19:10:07 +0100 Subject: [PATCH 26/80] Inline issue_access_token (#5659) this is only used in one place, so it's clearer if we inline it and reduce the API surface. Also, fixes a buglet where we would create an access token even if we were about to block the user (we would never return the AT, so the user could never use it, but it was still created and added to the db.) --- changelog.d/5659.misc | 1 + synapse/handlers/auth.py | 10 +++------- tests/api/test_auth.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) create mode 100644 changelog.d/5659.misc diff --git a/changelog.d/5659.misc b/changelog.d/5659.misc new file mode 100644 index 0000000000..686001295c --- /dev/null +++ b/changelog.d/5659.misc @@ -0,0 +1 @@ +Inline issue_access_token. diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index ef5585aa99..da312b188e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -578,9 +578,11 @@ class AuthHandler(BaseHandler): StoreError if there was a problem storing the token. """ logger.info("Logging in user %s on device %s", user_id, device_id) - access_token = yield self.issue_access_token(user_id, device_id) yield self.auth.check_auth_blocking(user_id) + access_token = self.macaroon_gen.generate_access_token(user_id) + yield self.store.add_access_token_to_user(user_id, access_token, device_id) + # the device *should* have been registered before we got here; however, # it's possible we raced against a DELETE operation. The thing we # really don't want is active access_tokens without a record of the @@ -831,12 +833,6 @@ class AuthHandler(BaseHandler): defer.returnValue(None) defer.returnValue(user_id) - @defer.inlineCallbacks - def issue_access_token(self, user_id, device_id=None): - access_token = self.macaroon_gen.generate_access_token(user_id) - yield self.store.add_access_token_to_user(user_id, access_token, device_id) - defer.returnValue(access_token) - @defer.inlineCallbacks def validate_short_term_login_token_and_get_user_id(self, login_token): auth_api = self.hs.get_auth() diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 96b26f974b..ddf2b578b3 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -244,7 +244,7 @@ class AuthTestCase(unittest.TestCase): USER_ID = "@percy:matrix.org" self.store.add_access_token_to_user = Mock() - token = yield self.hs.handlers.auth_handler.issue_access_token( + token = yield self.hs.handlers.auth_handler.get_access_token_for_user_id( USER_ID, "DEVICE" ) self.store.add_access_token_to_user.assert_called_with(USER_ID, token, "DEVICE") From 38a6d3eea7e3ce1a2e37f9f88fd6742fcadc90d0 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 11 Jul 2019 10:36:03 +0100 Subject: [PATCH 27/80] Add basic opentracing support (#5544) * Configure and initialise tracer Includes config options for the tracer and sets up JaegerClient. * Scope manager using LogContexts We piggy-back our tracer scopes by using log context. The current log context gives us the current scope. If new scope is created we create a stack of scopes in the context. * jaeger is a dependency now * Carrier inject and extraction for Twisted Headers * Trace federation requests on the way in and out. The span is created in _started_processing and closed in _finished_processing because we need a meaningful log context. * Create logcontext for new scope. Instead of having a stack of scopes in a logcontext we create a new context for a new scope if the current logcontext already has a scope. * Remove scope from logcontext if logcontext is top level * Disable tracer if not configured * typo * Remove dependence on jaeger internals * bools * Set service name * :Explicitely state that the tracer is disabled * Black is the new black * Newsfile * Code style * Use the new config setup. * Generate config. * Copyright * Rename config to opentracing * Remove user whitelisting * Empty whitelist by default * User ConfigError instead of RuntimeError * Use isinstance * Use tag constants for opentracing. * Remove debug comment and no need to explicitely record error * Two errors a "s(c)entry" * Docstrings! * Remove debugging brainslip * Homeserver Whitlisting * Better opentracing config comment * linting * Inclue worker name in service_name * Make opentracing an optional dependency * Neater config retreival * Clean up dummy tags * Instantiate tracing as object instead of global class * Inlcude opentracing as a homeserver member. * Thread opentracing to the request level * Reference opetnracing through hs * Instantiate dummy opentracin g for tests. * About to revert, just keeping the unfinished changes just in case * Revert back to global state, commit number: 9ce4a3d9067bf9889b86c360c05ac88618b85c4f * Use class level methods in tracerutils * Start and stop requests spans in a place where we have access to the authenticated entity * Seen it, isort it * Make sure to close the active span. * I'm getting black and blue from this. * Logger formatting Co-Authored-By: Erik Johnston * Outdated comment * Import opentracing at the top * Return a contextmanager * Start tracing client requests from the servlet * Return noop context manager if not tracing * Explicitely say that these are federation requests * Include servlet name in client requests * Use context manager * Move opentracing to logging/ * Seen it, isort it again! * Ignore twisted return exceptions on context exit * Escape the scope * Scopes should be entered to make them useful. * Nicer decorator names * Just one init, init? * Don't need to close something that isn't open * Docs make you smarter --- changelog.d/5544.misc | 1 + docs/sample_config.yaml | 17 ++ synapse/app/_base.py | 3 + synapse/config/homeserver.py | 2 + synapse/config/tracer.py | 50 ++++ synapse/federation/transport/server.py | 26 +- synapse/http/matrixfederationclient.py | 28 +- synapse/http/servlet.py | 7 +- synapse/logging/context.py | 8 +- synapse/logging/opentracing.py | 362 +++++++++++++++++++++++++ synapse/logging/scopecontextmanager.py | 140 ++++++++++ synapse/python_dependencies.py | 1 + 12 files changed, 633 insertions(+), 12 deletions(-) create mode 100644 changelog.d/5544.misc create mode 100644 synapse/config/tracer.py create mode 100644 synapse/logging/opentracing.py create mode 100644 synapse/logging/scopecontextmanager.py diff --git a/changelog.d/5544.misc b/changelog.d/5544.misc new file mode 100644 index 0000000000..81d6f74c31 --- /dev/null +++ b/changelog.d/5544.misc @@ -0,0 +1 @@ +Added opentracing and configuration options. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 7fe7c94ac4..0462f0a17a 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1395,3 +1395,20 @@ password_config: # module: "my_custom_project.SuperRulesSet" # config: # example_option: 'things' + + +## Opentracing ## +# These settings enable opentracing which implements distributed tracing +# This allows you to observe the causal chain of events across servers +# including requests, key lookups etc. across any server running +# synapse or any other other services which supports opentracing. +# (specifically those implemented with jaeger) + +#opentracing: +# # Enable / disable tracer +# tracer_enabled: false +# # The list of homeservers we wish to expose our current traces to. +# # The list is a list of regexes which are matched against the +# # servername of the homeserver +# homeserver_whitelist: +# - ".*" diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 1ebb7ae539..bd285122ea 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -243,6 +243,9 @@ def start(hs, listeners=None): # Load the certificate from disk. refresh_certificate(hs) + # Start the tracer + synapse.logging.opentracing.init_tracer(hs.config) + # It is now safe to start your Synapse. hs.start_listening(listeners) hs.get_datastore().start_profiling() diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index acadef4fd3..72acad4f18 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -40,6 +40,7 @@ from .spam_checker import SpamCheckerConfig from .stats import StatsConfig from .third_party_event_rules import ThirdPartyRulesConfig from .tls import TlsConfig +from .tracer import TracerConfig from .user_directory import UserDirectoryConfig from .voip import VoipConfig from .workers import WorkerConfig @@ -75,5 +76,6 @@ class HomeServerConfig( ServerNoticesConfig, RoomDirectoryConfig, ThirdPartyRulesConfig, + TracerConfig, ): pass diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py new file mode 100644 index 0000000000..63a637984a --- /dev/null +++ b/synapse/config/tracer.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C.d +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config, ConfigError + + +class TracerConfig(Config): + def read_config(self, config, **kwargs): + self.tracer_config = config.get("opentracing") + + self.tracer_config = config.get("opentracing", {"tracer_enabled": False}) + + if self.tracer_config.get("tracer_enabled", False): + # The tracer is enabled so sanitize the config + # If no whitelists are given + self.tracer_config.setdefault("homeserver_whitelist", []) + + if not isinstance(self.tracer_config.get("homeserver_whitelist"), list): + raise ConfigError("Tracer homesererver_whitelist config is malformed") + + def generate_config_section(cls, **kwargs): + return """\ + ## Opentracing ## + # These settings enable opentracing which implements distributed tracing + # This allows you to observe the causal chain of events across servers + # including requests, key lookups etc. across any server running + # synapse or any other other services which supports opentracing. + # (specifically those implemented with jaeger) + + #opentracing: + # # Enable / disable tracer + # tracer_enabled: false + # # The list of homeservers we wish to expose our current traces to. + # # The list is a list of regexes which are matched against the + # # servername of the homeserver + # homeserver_whitelist: + # - ".*" + """ diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2efdcff7ef..c45d458d94 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -21,6 +21,7 @@ import re from twisted.internet import defer import synapse +import synapse.logging.opentracing as opentracing from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.room_versions import RoomVersions from synapse.api.urls import ( @@ -288,14 +289,29 @@ class BaseFederationServlet(object): logger.warn("authenticate_request failed: %s", e) raise - if origin: - with ratelimiter.ratelimit(origin) as d: - yield d + # Start an opentracing span + with opentracing.start_active_span_from_context( + request.requestHeaders, + "incoming-federation-request", + tags={ + "request_id": request.get_request_id(), + opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_SERVER, + opentracing.tags.HTTP_METHOD: request.get_method(), + opentracing.tags.HTTP_URL: request.get_redacted_uri(), + opentracing.tags.PEER_HOST_IPV6: request.getClientIP(), + "authenticated_entity": origin, + }, + ): + if origin: + with ratelimiter.ratelimit(origin) as d: + yield d + response = yield func( + origin, content, request.args, *args, **kwargs + ) + else: response = yield func( origin, content, request.args, *args, **kwargs ) - else: - response = yield func(origin, content, request.args, *args, **kwargs) defer.returnValue(response) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index dee3710f68..e60334547e 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -36,6 +36,7 @@ from twisted.internet.task import _EPSILON, Cooperator from twisted.web._newclient import ResponseDone from twisted.web.http_headers import Headers +import synapse.logging.opentracing as opentracing import synapse.metrics import synapse.util.retryutils from synapse.api.errors import ( @@ -339,9 +340,25 @@ class MatrixFederationHttpClient(object): else: query_bytes = b"" - headers_dict = {b"User-Agent": [self.version_string_bytes]} + # Retreive current span + scope = opentracing.start_active_span( + "outgoing-federation-request", + tags={ + opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_CLIENT, + opentracing.tags.PEER_ADDRESS: request.destination, + opentracing.tags.HTTP_METHOD: request.method, + opentracing.tags.HTTP_URL: request.path, + }, + finish_on_close=True, + ) - with limiter: + # Inject the span into the headers + headers_dict = {} + opentracing.inject_active_span_byte_dict(headers_dict, request.destination) + + headers_dict[b"User-Agent"] = [self.version_string_bytes] + + with limiter, scope: # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: @@ -419,6 +436,10 @@ class MatrixFederationHttpClient(object): response.phrase.decode("ascii", errors="replace"), ) + opentracing.set_tag( + opentracing.tags.HTTP_STATUS_CODE, response.code + ) + if 200 <= response.code < 300: pass else: @@ -499,8 +520,7 @@ class MatrixFederationHttpClient(object): _flatten_response_never_received(e), ) raise - - defer.returnValue(response) + defer.returnValue(response) def build_auth_headers( self, destination, method, url_bytes, content=None, destination_is=None diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index cd8415acd5..889038ff25 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -20,6 +20,7 @@ import logging from canonicaljson import json from synapse.api.errors import Codes, SynapseError +from synapse.logging.opentracing import trace_servlet logger = logging.getLogger(__name__) @@ -290,7 +291,11 @@ class RestServlet(object): for method in ("GET", "PUT", "POST", "OPTIONS", "DELETE"): if hasattr(self, "on_%s" % (method,)): method_handler = getattr(self, "on_%s" % (method,)) - http_server.register_paths(method, patterns, method_handler) + http_server.register_paths( + method, + patterns, + trace_servlet(self.__class__.__name__, method_handler), + ) else: raise NotImplementedError("RestServlet must register something.") diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 30dfa1d6b2..b456c31f70 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -186,6 +186,7 @@ class LoggingContext(object): "alive", "request", "tag", + "scope", ] thread_local = threading.local() @@ -238,6 +239,7 @@ class LoggingContext(object): self.request = None self.tag = "" self.alive = True + self.scope = None self.parent_context = parent_context @@ -322,10 +324,12 @@ class LoggingContext(object): another LoggingContext """ - # 'request' is the only field we currently use in the logger, so that's - # all we need to copy + # we track the current request record.request = self.request + # we also track the current scope: + record.scope = self.scope + def start(self): if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py new file mode 100644 index 0000000000..f0ceea2a64 --- /dev/null +++ b/synapse/logging/opentracing.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C.d +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import opentracing + + +# NOTE +# This is a small wrapper around opentracing because opentracing is not currently +# packaged downstream (specifically debian). Since opentracing instrumentation is +# fairly invasive it was awkward to make it optional. As a result we opted to encapsulate +# all opentracing state in these methods which effectively noop if opentracing is +# not present. We should strongly consider encouraging the downstream distributers +# to package opentracing and making opentracing a full dependency. In order to facilitate +# this move the methods have work very similarly to opentracing's and it should only +# be a matter of few regexes to move over to opentracing's access patterns proper. + +try: + import opentracing +except ImportError: + opentracing = None +try: + from jaeger_client import Config as JaegerConfig + from synapse.logging.scopecontextmanager import LogContextScopeManager +except ImportError: + JaegerConfig = None + LogContextScopeManager = None + +import contextlib +import logging +import re +from functools import wraps + +from twisted.internet import defer + +logger = logging.getLogger(__name__) + + +class _DumTagNames(object): + """wrapper of opentracings tags. We need to have them if we + want to reference them without opentracing around. Clearly they + should never actually show up in a trace. `set_tags` overwrites + these with the correct ones.""" + + INVALID_TAG = "invalid-tag" + COMPONENT = INVALID_TAG + DATABASE_INSTANCE = INVALID_TAG + DATABASE_STATEMENT = INVALID_TAG + DATABASE_TYPE = INVALID_TAG + DATABASE_USER = INVALID_TAG + ERROR = INVALID_TAG + HTTP_METHOD = INVALID_TAG + HTTP_STATUS_CODE = INVALID_TAG + HTTP_URL = INVALID_TAG + MESSAGE_BUS_DESTINATION = INVALID_TAG + PEER_ADDRESS = INVALID_TAG + PEER_HOSTNAME = INVALID_TAG + PEER_HOST_IPV4 = INVALID_TAG + PEER_HOST_IPV6 = INVALID_TAG + PEER_PORT = INVALID_TAG + PEER_SERVICE = INVALID_TAG + SAMPLING_PRIORITY = INVALID_TAG + SERVICE = INVALID_TAG + SPAN_KIND = INVALID_TAG + SPAN_KIND_CONSUMER = INVALID_TAG + SPAN_KIND_PRODUCER = INVALID_TAG + SPAN_KIND_RPC_CLIENT = INVALID_TAG + SPAN_KIND_RPC_SERVER = INVALID_TAG + + +def only_if_tracing(func): + """Executes the function only if we're tracing. Otherwise return. + Assumes the function wrapped may return None""" + + @wraps(func) + def _only_if_tracing_inner(*args, **kwargs): + if opentracing: + return func(*args, **kwargs) + else: + return + + return _only_if_tracing_inner + + +# Block everything by default +_homeserver_whitelist = None + +tags = _DumTagNames + + +def init_tracer(config): + """Set the whitelists and initialise the JaegerClient tracer + + Args: + config (Config) + The config used by the homeserver. Here it's used to set the service + name to the homeserver's. + """ + global opentracing + if not config.tracer_config.get("tracer_enabled", False): + # We don't have a tracer + opentracing = None + return + + if not opentracing: + logger.error( + "The server has been configure to use opentracing but opentracing is not installed." + ) + raise ModuleNotFoundError("opentracing") + + if not JaegerConfig: + logger.error( + "The server has been configure to use opentracing but opentracing is not installed." + ) + + # Include the worker name + name = config.worker_name if config.worker_name else "master" + + set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"]) + jaeger_config = JaegerConfig( + config={"sampler": {"type": "const", "param": 1}, "logging": True}, + service_name="{} {}".format(config.server_name, name), + scope_manager=LogContextScopeManager(config), + ) + jaeger_config.initialize_tracer() + + # Set up tags to be opentracing's tags + global tags + tags = opentracing.tags + + +@contextlib.contextmanager +def _noop_context_manager(*args, **kwargs): + """Does absolutely nothing really well. Can be entered and exited arbitrarily. + Good substitute for an opentracing scope.""" + yield + + +# Could use kwargs but I want these to be explicit +def start_active_span( + operation_name, + child_of=None, + references=None, + tags=None, + start_time=None, + ignore_active_span=False, + finish_on_close=True, +): + """Starts an active opentracing span. Note, the scope doesn't become active + until it has been entered, however, the span starts from the time this + message is called. + Args: + See opentracing.tracer + Returns: + scope (Scope) or noop_context_manager + """ + if opentracing is None: + return _noop_context_manager() + else: + # We need to enter the scope here for the logcontext to become active + return opentracing.tracer.start_active_span( + operation_name, + child_of=child_of, + references=references, + tags=tags, + start_time=start_time, + ignore_active_span=ignore_active_span, + finish_on_close=finish_on_close, + ) + + +@only_if_tracing +def close_active_span(): + """Closes the active span. This will close it's logcontext if the context + was made for the span""" + opentracing.tracer.scope_manager.active.__exit__(None, None, None) + + +@only_if_tracing +def set_tag(key, value): + """Set's a tag on the active span""" + opentracing.tracer.active_span.set_tag(key, value) + + +@only_if_tracing +def log_kv(key_values, timestamp=None): + """Log to the active span""" + opentracing.tracer.active_span.log_kv(key_values, timestamp) + + +# Note: we don't have a get baggage items because we're trying to hide all +# scope and span state from synapse. I think this method may also be useless +# as a result +@only_if_tracing +def set_baggage_item(key, value): + """Attach baggage to the active span""" + opentracing.tracer.active_span.set_baggage_item(key, value) + + +@only_if_tracing +def set_operation_name(operation_name): + """Sets the operation name of the active span""" + opentracing.tracer.active_span.set_operation_name(operation_name) + + +@only_if_tracing +def set_homeserver_whitelist(homeserver_whitelist): + """Sets the whitelist + + Args: + homeserver_whitelist (iterable of strings): regex of whitelisted homeservers + """ + global _homeserver_whitelist + if homeserver_whitelist: + # Makes a single regex which accepts all passed in regexes in the list + _homeserver_whitelist = re.compile( + "({})".format(")|(".join(homeserver_whitelist)) + ) + + +@only_if_tracing +def whitelisted_homeserver(destination): + """Checks if a destination matches the whitelist + Args: + destination (String)""" + global _homeserver_whitelist + if _homeserver_whitelist: + return _homeserver_whitelist.match(destination) + return False + + +def start_active_span_from_context( + headers, + operation_name, + references=None, + tags=None, + start_time=None, + ignore_active_span=False, + finish_on_close=True, +): + """ + Extracts a span context from Twisted Headers. + args: + headers (twisted.web.http_headers.Headers) + returns: + span_context (opentracing.span.SpanContext) + """ + # Twisted encodes the values as lists whereas opentracing doesn't. + # So, we take the first item in the list. + # Also, twisted uses byte arrays while opentracing expects strings. + if opentracing is None: + return _noop_context_manager() + + header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()} + context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict) + + return opentracing.tracer.start_active_span( + operation_name, + child_of=context, + references=references, + tags=tags, + start_time=start_time, + ignore_active_span=ignore_active_span, + finish_on_close=finish_on_close, + ) + + +@only_if_tracing +def inject_active_span_twisted_headers(headers, destination): + """ + Injects a span context into twisted headers inplace + + Args: + headers (twisted.web.http_headers.Headers) + span (opentracing.Span) + + Returns: + Inplace modification of headers + + Note: + The headers set by the tracer are custom to the tracer implementation which + should be unique enough that they don't interfere with any headers set by + synapse or twisted. If we're still using jaeger these headers would be those + here: + https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py + """ + + if not whitelisted_homeserver(destination): + return + + span = opentracing.tracer.active_span + carrier = {} + opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + + for key, value in carrier.items(): + headers.addRawHeaders(key, value) + + +@only_if_tracing +def inject_active_span_byte_dict(headers, destination): + """ + Injects a span context into a dict where the headers are encoded as byte + strings + + Args: + headers (dict) + span (opentracing.Span) + + Returns: + Inplace modification of headers + + Note: + The headers set by the tracer are custom to the tracer implementation which + should be unique enough that they don't interfere with any headers set by + synapse or twisted. If we're still using jaeger these headers would be those + here: + https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py + """ + if not whitelisted_homeserver(destination): + return + + span = opentracing.tracer.active_span + + carrier = {} + opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + + for key, value in carrier.items(): + headers[key.encode()] = [value.encode()] + + +def trace_servlet(servlet_name, func): + """Decorator which traces a serlet. It starts a span with some servlet specific + tags such as the servlet_name and request information""" + + @wraps(func) + @defer.inlineCallbacks + def _trace_servlet_inner(request, *args, **kwargs): + with start_active_span_from_context( + request.requestHeaders, + "incoming-client-request", + tags={ + "request_id": request.get_request_id(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), + "servlet_name": servlet_name, + }, + ): + result = yield defer.maybeDeferred(func, request, *args, **kwargs) + defer.returnValue(result) + + return _trace_servlet_inner diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py new file mode 100644 index 0000000000..91e14462f3 --- /dev/null +++ b/synapse/logging/scopecontextmanager.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import logging + +import logging + +from opentracing import Scope, ScopeManager + +import twisted + +from synapse.logging.context import LoggingContext, nested_logging_context + +logger = logging.getLogger(__name__) + + +class LogContextScopeManager(ScopeManager): + """ + The LogContextScopeManager tracks the active scope in opentracing + by using the log contexts which are native to synapse. This is so + that the basic opentracing api can be used across twisted defereds. + (I would love to break logcontexts and this into an OS package. but + let's wait for twisted's contexts to be released.) + """ + + def __init__(self, config): + # Set the whitelists + logger.info(config.tracer_config) + self._homeserver_whitelist = config.tracer_config["homeserver_whitelist"] + + @property + def active(self): + """ + Returns the currently active Scope which can be used to access the + currently active Scope.span. + If there is a non-null Scope, its wrapped Span + becomes an implicit parent of any newly-created Span at + Tracer.start_active_span() time. + + Return: + (Scope) : the Scope that is active, or None if not + available. + """ + ctx = LoggingContext.current_context() + if ctx is LoggingContext.sentinel: + return None + else: + return ctx.scope + + def activate(self, span, finish_on_close): + """ + Makes a Span active. + Args + span (Span): the span that should become active. + finish_on_close (Boolean): whether Span should be automatically + finished when Scope.close() is called. + + Returns: + Scope to control the end of the active period for + *span*. It is a programming error to neglect to call + Scope.close() on the returned instance. + """ + + enter_logcontext = False + ctx = LoggingContext.current_context() + + if ctx is LoggingContext.sentinel: + # We don't want this scope to affect. + logger.error("Tried to activate scope outside of loggingcontext") + return Scope(None, span) + elif ctx.scope is not None: + # We want the logging scope to look exactly the same so we give it + # a blank suffix + ctx = nested_logging_context("") + enter_logcontext = True + + scope = _LogContextScope(self, span, ctx, enter_logcontext, finish_on_close) + ctx.scope = scope + return scope + + +class _LogContextScope(Scope): + """ + A custom opentracing scope. The only significant difference is that it will + close the log context it's related to if the logcontext was created specifically + for this scope. + """ + + def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close): + """ + Args: + manager (LogContextScopeManager): + the manager that is responsible for this scope. + span (Span): + the opentracing span which this scope represents the local + lifetime for. + logcontext (LogContext): + the logcontext to which this scope is attached. + enter_logcontext (Boolean): + if True the logcontext will be entered and exited when the scope + is entered and exited respectively + finish_on_close (Boolean): + if True finish the span when the scope is closed + """ + super(_LogContextScope, self).__init__(manager, span) + self.logcontext = logcontext + self._finish_on_close = finish_on_close + self._enter_logcontext = enter_logcontext + + def __enter__(self): + if self._enter_logcontext: + self.logcontext.__enter__() + + def __exit__(self, type, value, traceback): + if type == twisted.internet.defer._DefGen_Return: + super(_LogContextScope, self).__exit__(None, None, None) + else: + super(_LogContextScope, self).__exit__(type, value, traceback) + if self._enter_logcontext: + self.logcontext.__exit__(type, value, traceback) + else: # the logcontext existed before the creation of the scope + self.logcontext.scope = None + + def close(self): + if self.manager.active is not self: + logger.error("Tried to close a none active scope!") + return + + if self._finish_on_close: + self.span.finish() diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 6324c00ef1..e7618057be 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -95,6 +95,7 @@ CONDITIONAL_REQUIREMENTS = { "url_preview": ["lxml>=3.5.0"], "test": ["mock>=2.0", "parameterized"], "sentry": ["sentry-sdk>=0.7.2"], + "opentracing": ["jaeger-client>=4.0.0", "opentracing>=2.2.0"], "jwt": ["pyjwt>=1.6.4"], } From 0a4001eba1eb22fc7c39f257c8d5a326b1a489ad Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 11 Jul 2019 11:06:23 +0100 Subject: [PATCH 28/80] Clean up exception handling for access_tokens (#5656) First of all, let's get rid of `TOKEN_NOT_FOUND_HTTP_STATUS`. It was a hack we did at one point when it was possible to return either a 403 or a 401 if the creds were missing. We always return a 401 in these cases now (thankfully), so it's not needed. Let's also stop abusing `AuthError` for these cases. Honestly they have nothing that relates them to the other places that `AuthError` is used, other than the fact that they are loosely under the 'Auth' banner. It makes no sense for them to share exception classes. Instead, let's add a couple of new exception classes: `InvalidClientTokenError` and `MissingClientTokenError`, for the `M_UNKNOWN_TOKEN` and `M_MISSING_TOKEN` cases respectively - and an `InvalidClientCredentialsError` base class for the two of them. --- changelog.d/5656.misc | 1 + synapse/api/auth.py | 127 +++++++++------------------- synapse/api/errors.py | 33 +++++++- synapse/rest/client/v1/directory.py | 10 ++- synapse/rest/client/v1/room.py | 9 +- tests/api/test_auth.py | 31 +++++-- 6 files changed, 111 insertions(+), 100 deletions(-) create mode 100644 changelog.d/5656.misc diff --git a/changelog.d/5656.misc b/changelog.d/5656.misc new file mode 100644 index 0000000000..a8de20a7d0 --- /dev/null +++ b/changelog.d/5656.misc @@ -0,0 +1 @@ +Clean up exception handling around client access tokens. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 86f145649c..afc6400948 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -25,7 +25,13 @@ from twisted.internet import defer import synapse.types from synapse import event_auth from synapse.api.constants import EventTypes, JoinRules, Membership -from synapse.api.errors import AuthError, Codes, ResourceLimitError +from synapse.api.errors import ( + AuthError, + Codes, + InvalidClientTokenError, + MissingClientTokenError, + ResourceLimitError, +) from synapse.config.server import is_threepid_reserved from synapse.types import UserID from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache @@ -63,7 +69,6 @@ class Auth(object): self.clock = hs.get_clock() self.store = hs.get_datastore() self.state = hs.get_state_handler() - self.TOKEN_NOT_FOUND_HTTP_STATUS = 401 self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) register_cache("cache", "token_cache", self.token_cache) @@ -189,18 +194,17 @@ class Auth(object): Returns: defer.Deferred: resolves to a ``synapse.types.Requester`` object Raises: - AuthError if no user by that token exists or the token is invalid. + InvalidClientCredentialsError if no user by that token exists or the token + is invalid. + AuthError if access is denied for the user in the access token """ - # Can optionally look elsewhere in the request (e.g. headers) try: ip_addr = self.hs.get_ip_from_request(request) user_agent = request.requestHeaders.getRawHeaders( b"User-Agent", default=[b""] )[0].decode("ascii", "surrogateescape") - access_token = self.get_access_token_from_request( - request, self.TOKEN_NOT_FOUND_HTTP_STATUS - ) + access_token = self.get_access_token_from_request(request) user_id, app_service = yield self._get_appservice_user_id(request) if user_id: @@ -264,18 +268,12 @@ class Auth(object): ) ) except KeyError: - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Missing access token.", - errcode=Codes.MISSING_TOKEN, - ) + raise MissingClientTokenError() @defer.inlineCallbacks def _get_appservice_user_id(self, request): app_service = self.store.get_app_service_by_token( - self.get_access_token_from_request( - request, self.TOKEN_NOT_FOUND_HTTP_STATUS - ) + self.get_access_token_from_request(request) ) if app_service is None: defer.returnValue((None, None)) @@ -313,7 +311,8 @@ class Auth(object): `token_id` (int|None): access token id. May be None if guest `device_id` (str|None): device corresponding to access token Raises: - AuthError if no user by that token exists or the token is invalid. + InvalidClientCredentialsError if no user by that token exists or the token + is invalid. """ if rights == "access": @@ -331,11 +330,7 @@ class Auth(object): if not guest: # non-guest access tokens must be in the database logger.warning("Unrecognised access token - not in store.") - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Unrecognised access token.", - errcode=Codes.UNKNOWN_TOKEN, - ) + raise InvalidClientTokenError() # Guest access tokens are not stored in the database (there can # only be one access token per guest, anyway). @@ -350,16 +345,10 @@ class Auth(object): # guest tokens. stored_user = yield self.store.get_user_by_id(user_id) if not stored_user: - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Unknown user_id %s" % user_id, - errcode=Codes.UNKNOWN_TOKEN, - ) + raise InvalidClientTokenError("Unknown user_id %s" % user_id) if not stored_user["is_guest"]: - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Guest access token used for regular user", - errcode=Codes.UNKNOWN_TOKEN, + raise InvalidClientTokenError( + "Guest access token used for regular user" ) ret = { "user": user, @@ -386,11 +375,7 @@ class Auth(object): ValueError, ) as e: logger.warning("Invalid macaroon in auth: %s %s", type(e), e) - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Invalid macaroon passed.", - errcode=Codes.UNKNOWN_TOKEN, - ) + raise InvalidClientTokenError("Invalid macaroon passed.") def _parse_and_validate_macaroon(self, token, rights="access"): """Takes a macaroon and tries to parse and validate it. This is cached @@ -430,11 +415,7 @@ class Auth(object): macaroon, rights, self.hs.config.expire_access_token, user_id=user_id ) except (pymacaroons.exceptions.MacaroonException, TypeError, ValueError): - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Invalid macaroon passed.", - errcode=Codes.UNKNOWN_TOKEN, - ) + raise InvalidClientTokenError("Invalid macaroon passed.") if not has_expiry and rights == "access": self.token_cache[token] = (user_id, guest) @@ -453,17 +434,14 @@ class Auth(object): (str) user id Raises: - AuthError if there is no user_id caveat in the macaroon + InvalidClientCredentialsError if there is no user_id caveat in the + macaroon """ user_prefix = "user_id = " for caveat in macaroon.caveats: if caveat.caveat_id.startswith(user_prefix): return caveat.caveat_id[len(user_prefix) :] - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "No user caveat in macaroon", - errcode=Codes.UNKNOWN_TOKEN, - ) + raise InvalidClientTokenError("No user caveat in macaroon") def validate_macaroon(self, macaroon, type_string, verify_expiry, user_id): """ @@ -531,22 +509,13 @@ class Auth(object): defer.returnValue(user_info) def get_appservice_by_req(self, request): - try: - token = self.get_access_token_from_request( - request, self.TOKEN_NOT_FOUND_HTTP_STATUS - ) - service = self.store.get_app_service_by_token(token) - if not service: - logger.warn("Unrecognised appservice access token.") - raise AuthError( - self.TOKEN_NOT_FOUND_HTTP_STATUS, - "Unrecognised access token.", - errcode=Codes.UNKNOWN_TOKEN, - ) - request.authenticated_entity = service.sender - return defer.succeed(service) - except KeyError: - raise AuthError(self.TOKEN_NOT_FOUND_HTTP_STATUS, "Missing access token.") + token = self.get_access_token_from_request(request) + service = self.store.get_app_service_by_token(token) + if not service: + logger.warn("Unrecognised appservice access token.") + raise InvalidClientTokenError() + request.authenticated_entity = service.sender + return defer.succeed(service) def is_server_admin(self, user): """ Check if the given user is a local server admin. @@ -692,20 +661,16 @@ class Auth(object): return bool(query_params) or bool(auth_headers) @staticmethod - def get_access_token_from_request(request, token_not_found_http_status=401): + def get_access_token_from_request(request): """Extracts the access_token from the request. Args: request: The http request. - token_not_found_http_status(int): The HTTP status code to set in the - AuthError if the token isn't found. This is used in some of the - legacy APIs to change the status code to 403 from the default of - 401 since some of the old clients depended on auth errors returning - 403. Returns: unicode: The access_token Raises: - AuthError: If there isn't an access_token in the request. + MissingClientTokenError: If there isn't a single access_token in the + request """ auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") @@ -714,34 +679,20 @@ class Auth(object): # Try the get the access_token from a "Authorization: Bearer" # header if query_params is not None: - raise AuthError( - token_not_found_http_status, - "Mixing Authorization headers and access_token query parameters.", - errcode=Codes.MISSING_TOKEN, + raise MissingClientTokenError( + "Mixing Authorization headers and access_token query parameters." ) if len(auth_headers) > 1: - raise AuthError( - token_not_found_http_status, - "Too many Authorization headers.", - errcode=Codes.MISSING_TOKEN, - ) + raise MissingClientTokenError("Too many Authorization headers.") parts = auth_headers[0].split(b" ") if parts[0] == b"Bearer" and len(parts) == 2: return parts[1].decode("ascii") else: - raise AuthError( - token_not_found_http_status, - "Invalid Authorization header.", - errcode=Codes.MISSING_TOKEN, - ) + raise MissingClientTokenError("Invalid Authorization header.") else: # Try to get the access_token from the query params. if not query_params: - raise AuthError( - token_not_found_http_status, - "Missing access token.", - errcode=Codes.MISSING_TOKEN, - ) + raise MissingClientTokenError() return query_params[0].decode("ascii") diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 28b5c2af9b..41fd04cd54 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -210,7 +210,9 @@ class NotFoundError(SynapseError): class AuthError(SynapseError): - """An error raised when there was a problem authorising an event.""" + """An error raised when there was a problem authorising an event, and at various + other poorly-defined times. + """ def __init__(self, *args, **kwargs): if "errcode" not in kwargs: @@ -218,6 +220,35 @@ class AuthError(SynapseError): super(AuthError, self).__init__(*args, **kwargs) +class InvalidClientCredentialsError(SynapseError): + """An error raised when there was a problem with the authorisation credentials + in a client request. + + https://matrix.org/docs/spec/client_server/r0.5.0#using-access-tokens: + + When credentials are required but missing or invalid, the HTTP call will + return with a status of 401 and the error code, M_MISSING_TOKEN or + M_UNKNOWN_TOKEN respectively. + """ + + def __init__(self, msg, errcode): + super().__init__(code=401, msg=msg, errcode=errcode) + + +class MissingClientTokenError(InvalidClientCredentialsError): + """Raised when we couldn't find the access token in a request""" + + def __init__(self, msg="Missing access token"): + super().__init__(msg=msg, errcode="M_MISSING_TOKEN") + + +class InvalidClientTokenError(InvalidClientCredentialsError): + """Raised when we didn't understand the access token in a request""" + + def __init__(self, msg="Unrecognised access token"): + super().__init__(msg=msg, errcode="M_UNKNOWN_TOKEN") + + class ResourceLimitError(SynapseError): """ Any error raised when there is a problem with resource usage. diff --git a/synapse/rest/client/v1/directory.py b/synapse/rest/client/v1/directory.py index dd0d38ea5c..57542c2b4b 100644 --- a/synapse/rest/client/v1/directory.py +++ b/synapse/rest/client/v1/directory.py @@ -18,7 +18,13 @@ import logging from twisted.internet import defer -from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + InvalidClientCredentialsError, + NotFoundError, + SynapseError, +) from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.rest.client.v2_alpha._base import client_patterns from synapse.types import RoomAlias @@ -97,7 +103,7 @@ class ClientDirectoryServer(RestServlet): room_alias.to_string(), ) defer.returnValue((200, {})) - except AuthError: + except InvalidClientCredentialsError: # fallback to default user behaviour if they aren't an AS pass diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index cca7e45ddb..7709c2d705 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -24,7 +24,12 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import EventTypes, Membership -from synapse.api.errors import AuthError, Codes, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + InvalidClientCredentialsError, + SynapseError, +) from synapse.api.filtering import Filter from synapse.events.utils import format_event_for_client_v2 from synapse.http.servlet import ( @@ -307,7 +312,7 @@ class PublicRoomListRestServlet(TransactionRestServlet): try: yield self.auth.get_user_by_req(request, allow_guest=True) - except AuthError as e: + except InvalidClientCredentialsError as e: # Option to allow servers to require auth when accessing # /publicRooms via CS API. This is especially helpful in private # federations. diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index ddf2b578b3..ee92ceeb60 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -21,7 +21,14 @@ from twisted.internet import defer import synapse.handlers.auth from synapse.api.auth import Auth -from synapse.api.errors import AuthError, Codes, ResourceLimitError +from synapse.api.errors import ( + AuthError, + Codes, + InvalidClientCredentialsError, + InvalidClientTokenError, + MissingClientTokenError, + ResourceLimitError, +) from synapse.types import UserID from tests import unittest @@ -70,7 +77,9 @@ class AuthTestCase(unittest.TestCase): request.args[b"access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = mock_getRawHeaders() d = self.auth.get_user_by_req(request) - self.failureResultOf(d, AuthError) + f = self.failureResultOf(d, InvalidClientTokenError).value + self.assertEqual(f.code, 401) + self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN") def test_get_user_by_req_user_missing_token(self): user_info = {"name": self.test_user, "token_id": "ditto"} @@ -79,7 +88,9 @@ class AuthTestCase(unittest.TestCase): request = Mock(args={}) request.requestHeaders.getRawHeaders = mock_getRawHeaders() d = self.auth.get_user_by_req(request) - self.failureResultOf(d, AuthError) + f = self.failureResultOf(d, MissingClientTokenError).value + self.assertEqual(f.code, 401) + self.assertEqual(f.errcode, "M_MISSING_TOKEN") @defer.inlineCallbacks def test_get_user_by_req_appservice_valid_token(self): @@ -133,7 +144,9 @@ class AuthTestCase(unittest.TestCase): request.args[b"access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = mock_getRawHeaders() d = self.auth.get_user_by_req(request) - self.failureResultOf(d, AuthError) + f = self.failureResultOf(d, InvalidClientTokenError).value + self.assertEqual(f.code, 401) + self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN") def test_get_user_by_req_appservice_bad_token(self): self.store.get_app_service_by_token = Mock(return_value=None) @@ -143,7 +156,9 @@ class AuthTestCase(unittest.TestCase): request.args[b"access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = mock_getRawHeaders() d = self.auth.get_user_by_req(request) - self.failureResultOf(d, AuthError) + f = self.failureResultOf(d, InvalidClientTokenError).value + self.assertEqual(f.code, 401) + self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN") def test_get_user_by_req_appservice_missing_token(self): app_service = Mock(token="foobar", url="a_url", sender=self.test_user) @@ -153,7 +168,9 @@ class AuthTestCase(unittest.TestCase): request = Mock(args={}) request.requestHeaders.getRawHeaders = mock_getRawHeaders() d = self.auth.get_user_by_req(request) - self.failureResultOf(d, AuthError) + f = self.failureResultOf(d, MissingClientTokenError).value + self.assertEqual(f.code, 401) + self.assertEqual(f.errcode, "M_MISSING_TOKEN") @defer.inlineCallbacks def test_get_user_by_req_appservice_valid_token_valid_user_id(self): @@ -280,7 +297,7 @@ class AuthTestCase(unittest.TestCase): request.args[b"access_token"] = [guest_tok.encode("ascii")] request.requestHeaders.getRawHeaders = mock_getRawHeaders() - with self.assertRaises(AuthError) as cm: + with self.assertRaises(InvalidClientCredentialsError) as cm: yield self.auth.get_user_by_req(request, allow_guest=True) self.assertEqual(401, cm.exception.code) From 78a1cd36b568c3021b311703df147b68f5fd1c19 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 11 Jul 2019 13:33:23 +0100 Subject: [PATCH 29/80] small typo fix (#5655) --- changelog.d/5655.doc | 1 + synapse/storage/events_worker.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5655.doc diff --git a/changelog.d/5655.doc b/changelog.d/5655.doc new file mode 100644 index 0000000000..acab6aee92 --- /dev/null +++ b/changelog.d/5655.doc @@ -0,0 +1 @@ +Fix a small typo in a code comment. \ No newline at end of file diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 09db872511..874d0a56bc 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -327,7 +327,7 @@ class EventsWorkerStore(SQLBaseStore): Args: events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected + allow_rejected (bool): Whether to return events that were rejected update_metrics (bool): Whether to update the cache hit ratio metrics Returns: From 39e9839a04c416ac70cdbd48f33ff574d56c7fbe Mon Sep 17 00:00:00 2001 From: Lrizika Date: Thu, 11 Jul 2019 06:31:36 -0700 Subject: [PATCH 30/80] Improved docs on setting up Postgresql (#5661) Added that synapse_user needs a database to access before it can auth Noted you'll need to enable password auth, linked to pg_hba.conf docs --- changelog.d/5661.docs | 1 + docs/postgres.rst | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 changelog.d/5661.docs diff --git a/changelog.d/5661.docs b/changelog.d/5661.docs new file mode 100644 index 0000000000..c70e62014e --- /dev/null +++ b/changelog.d/5661.docs @@ -0,0 +1 @@ +Improvements to Postgres setup instructions. Contributed by @Lrizika - thanks! diff --git a/docs/postgres.rst b/docs/postgres.rst index 33f58e3ace..0ae52ccbd8 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -34,9 +34,14 @@ Assuming your PostgreSQL database user is called ``postgres``, create a user su - postgres createuser --pwprompt synapse_user -The PostgreSQL database used *must* have the correct encoding set, otherwise it -would not be able to store UTF8 strings. To create a database with the correct -encoding use, e.g.:: +Before you can authenticate with the ``synapse_user``, you must create a +database that it can access. To create a database, first connect to the database +with your database user:: + + su - postgres + psql + +and then run:: CREATE DATABASE synapse ENCODING 'UTF8' @@ -46,7 +51,13 @@ encoding use, e.g.:: OWNER synapse_user; This would create an appropriate database named ``synapse`` owned by the -``synapse_user`` user (which must already exist). +``synapse_user`` user (which must already have been created as above). + +Note that the PostgreSQL database *must* have the correct encoding set (as +shown above), otherwise it will not be able to store UTF8 strings. + +You may need to enable password authentication so ``synapse_user`` can connect +to the database. See https://www.postgresql.org/docs/11/auth-pg-hba-conf.html. Tuning Postgres =============== From a83577d64f23c260bd7899a4dee5ff00d1058253 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 11 Jul 2019 23:43:41 +1000 Subject: [PATCH 31/80] Use /src for checking out synapse during sytests (#5664) --- .buildkite/pipeline.yml | 3 +++ changelog.d/5664.misc | 1 + 2 files changed, 4 insertions(+) create mode 100644 changelog.d/5664.misc diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dd0f98cba0..7f42fad909 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -179,6 +179,7 @@ steps: image: "matrixdotorg/sytest-synapse:py35" propagate-environment: true always-pull: true + workdir: "/src" retry: automatic: - exit_status: -1 @@ -199,6 +200,7 @@ steps: image: "matrixdotorg/sytest-synapse:py35" propagate-environment: true always-pull: true + workdir: "/src" retry: automatic: - exit_status: -1 @@ -220,6 +222,7 @@ steps: image: "matrixdotorg/sytest-synapse:py35" propagate-environment: true always-pull: true + workdir: "/src" soft_fail: true retry: automatic: diff --git a/changelog.d/5664.misc b/changelog.d/5664.misc new file mode 100644 index 0000000000..0ca7a0fbd0 --- /dev/null +++ b/changelog.d/5664.misc @@ -0,0 +1 @@ +Update the sytest BuildKite configuration to checkout Synapse in `/src`. From 6bb0357c949dcc433e2316c395199644ef75e02c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 12 Jul 2019 10:16:23 +0100 Subject: [PATCH 32/80] Add a mechanism for per-test configs (#5657) It's useful to be able to tweak the homeserver config to be used for each test. This PR adds a mechanism to do so. --- changelog.d/5657.misc | 1 + tests/unittest.py | 55 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5657.misc diff --git a/changelog.d/5657.misc b/changelog.d/5657.misc new file mode 100644 index 0000000000..bdec9ae4c0 --- /dev/null +++ b/changelog.d/5657.misc @@ -0,0 +1 @@ +Add a mechanism for per-test homeserver configuration in the unit tests. diff --git a/tests/unittest.py b/tests/unittest.py index a09e76c7c2..0f0c2ad69d 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -157,6 +157,21 @@ class HomeserverTestCase(TestCase): """ A base TestCase that reduces boilerplate for HomeServer-using test cases. + Defines a setUp method which creates a mock reactor, and instantiates a homeserver + running on that reactor. + + There are various hooks for modifying the way that the homeserver is instantiated: + + * override make_homeserver, for example by making it pass different parameters into + setup_test_homeserver. + + * override default_config, to return a modified configuration dictionary for use + by setup_test_homeserver. + + * On a per-test basis, you can use the @override_config decorator to give a + dictionary containing additional configuration settings to be added to the basic + config dict. + Attributes: servlets (list[function]): List of servlet registration function. user_id (str): The user ID to assume if auth is hijacked. @@ -168,6 +183,13 @@ class HomeserverTestCase(TestCase): hijack_auth = True needs_threadpool = False + def __init__(self, methodName, *args, **kwargs): + super().__init__(methodName, *args, **kwargs) + + # see if we have any additional config for this test + method = getattr(self, methodName) + self._extra_config = getattr(method, "_extra_config", None) + def setUp(self): """ Set up the TestCase by calling the homeserver constructor, optionally @@ -276,7 +298,14 @@ class HomeserverTestCase(TestCase): Args: name (str): The homeserver name/domain. """ - return default_config(name) + config = default_config(name) + + # apply any additional config which was specified via the override_config + # decorator. + if self._extra_config is not None: + config.update(self._extra_config) + + return config def prepare(self, reactor, clock, homeserver): """ @@ -534,3 +563,27 @@ class HomeserverTestCase(TestCase): ) self.render(request) self.assertEqual(channel.code, 403, channel.result) + + +def override_config(extra_config): + """A decorator which can be applied to test functions to give additional HS config + + For use + + For example: + + class MyTestCase(HomeserverTestCase): + @override_config({"enable_registration": False, ...}) + def test_foo(self): + ... + + Args: + extra_config(dict): Additional config settings to be merged into the default + config dict before instantiating the test homeserver. + """ + + def decorator(func): + func._extra_config = extra_config + return func + + return decorator From f36916476151309c7e1c00ae35fd3bf551f61d42 Mon Sep 17 00:00:00 2001 From: Slavi Pantaleev Date: Fri, 12 Jul 2019 13:38:25 +0300 Subject: [PATCH 33/80] Upgrade Alpine Linux used in the Docker image (3.8 -> 3.10) (#5619) Alpine Linux 3.8 is still supported, but it seems like it's quite outdated now. While Python should be the same on both, all other libraries, etc., are much newer in Alpine 3.9 and 3.10. Signed-off-by: Slavi Pantaleev --- changelog.d/5619.misc | 1 + docker/Dockerfile | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5619.misc diff --git a/changelog.d/5619.misc b/changelog.d/5619.misc new file mode 100644 index 0000000000..c5e22d2051 --- /dev/null +++ b/changelog.d/5619.misc @@ -0,0 +1 @@ +Base Docker image on a newer Alpine Linux version (3.8 -> 3.10) diff --git a/docker/Dockerfile b/docker/Dockerfile index 79276209f6..e5a0d6d5f6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,7 +16,7 @@ ARG PYTHON_VERSION=3.7 ### ### Stage 0: builder ### -FROM docker.io/python:${PYTHON_VERSION}-alpine3.8 as builder +FROM docker.io/python:${PYTHON_VERSION}-alpine3.10 as builder # install the OS build deps @@ -55,7 +55,7 @@ RUN pip install --prefix="/install" --no-warn-script-location \ ### Stage 1: runtime ### -FROM docker.io/python:${PYTHON_VERSION}-alpine3.8 +FROM docker.io/python:${PYTHON_VERSION}-alpine3.10 # xmlsec is required for saml support RUN apk add --no-cache --virtual .runtime_deps \ From 59f15309ca09c6f46602b47eafd1e5527fff224d Mon Sep 17 00:00:00 2001 From: Slavi Pantaleev Date: Fri, 12 Jul 2019 13:43:42 +0300 Subject: [PATCH 34/80] Add missing space in default logging file format generated by the Docker image (#5620) This adds a missing space, without which log lines appear uglier. Signed-off-by: Slavi Pantaleev --- changelog.d/5620.bugfix | 1 + docker/conf/log.config | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5620.bugfix diff --git a/changelog.d/5620.bugfix b/changelog.d/5620.bugfix new file mode 100644 index 0000000000..17e1f133e8 --- /dev/null +++ b/changelog.d/5620.bugfix @@ -0,0 +1 @@ +Add missing space in default logging file format generated by the Docker image diff --git a/docker/conf/log.config b/docker/conf/log.config index ea5ccfd68b..db35e475a4 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -2,7 +2,7 @@ version: 1 formatters: precise: - format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s- %(message)s' + format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' filters: context: From d445b3ae57e589903ead3563a76f5c4e9322de19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20G=C3=BCnther?= Date: Fri, 12 Jul 2019 12:46:18 +0200 Subject: [PATCH 35/80] Update reverse_proxy.rst (#5397) Updates reverse_proxy.rst with information about nginx' URI normalisation. --- changelog.d/5397.doc | 1 + docs/reverse_proxy.rst | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/5397.doc diff --git a/changelog.d/5397.doc b/changelog.d/5397.doc new file mode 100644 index 0000000000..c2b500b482 --- /dev/null +++ b/changelog.d/5397.doc @@ -0,0 +1 @@ +Add information about nginx normalisation to reverse_proxy.rst. Contributed by @skalarproduktraum - thanks! diff --git a/docs/reverse_proxy.rst b/docs/reverse_proxy.rst index e4b870411c..4b640ffc4f 100644 --- a/docs/reverse_proxy.rst +++ b/docs/reverse_proxy.rst @@ -48,6 +48,8 @@ Let's assume that we expect clients to connect to our server at proxy_set_header X-Forwarded-For $remote_addr; } } + + Do not add a `/` after the port in `proxy_pass`, otherwise nginx will canonicalise/normalise the URI. * Caddy:: From 4c17a87606603a7128066d673a08e9d1765f5556 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jul 2019 11:47:24 +0100 Subject: [PATCH 36/80] fix changelog name --- changelog.d/{5661.docs => 5661.doc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{5661.docs => 5661.doc} (100%) diff --git a/changelog.d/5661.docs b/changelog.d/5661.doc similarity index 100% rename from changelog.d/5661.docs rename to changelog.d/5661.doc From 24aa0e0a5bac33df5a9a2e50a74aa500af320953 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 12 Jul 2019 15:29:32 +0100 Subject: [PATCH 37/80] fix typo: backgroud -> background --- synapse/rest/media/v1/storage_provider.py | 2 +- synapse/storage/registration.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index e8f559acc1..37687ea7f4 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -67,7 +67,7 @@ class StorageProviderWrapper(StorageProvider): backend (StorageProvider) store_local (bool): Whether to store new local files or not. store_synchronous (bool): Whether to wait for file to be successfully - uploaded, or todo the upload in the backgroud. + uploaded, or todo the upload in the background. store_remote (bool): Whether remote media should be uploaded """ diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 8e217c9408..73580f1725 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -603,7 +603,7 @@ class RegistrationStore( ) self.register_background_update_handler( - "users_set_deactivated_flag", self._backgroud_update_set_deactivated_flag + "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) # Create a background job for culling expired 3PID validity tokens @@ -618,14 +618,14 @@ class RegistrationStore( hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS) @defer.inlineCallbacks - def _backgroud_update_set_deactivated_flag(self, progress, batch_size): + def _background_update_set_deactivated_flag(self, progress, batch_size): """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1 for each of them. """ last_user = progress.get("user_id", "") - def _backgroud_update_set_deactivated_flag_txn(txn): + def _background_update_set_deactivated_flag_txn(txn): txn.execute( """ SELECT @@ -670,7 +670,7 @@ class RegistrationStore( return False end = yield self.runInteraction( - "users_set_deactivated_flag", _backgroud_update_set_deactivated_flag_txn + "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn ) if end: From db0a50bc40e4aca4a03fa3a09d33497f57f95c2d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jul 2019 16:59:59 +0100 Subject: [PATCH 38/80] Fixup docstrings --- synapse/storage/stream.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index f8e3007d67..a0465484df 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -835,7 +835,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): as a list of _EventDictReturn and a token that points to the end of the result set. If no events are returned then the end of the stream has been reached (i.e. there are no events between - `from_token` and `to_token`). + `from_token` and `to_token`), or `limit` is zero. """ assert int(limit) >= 0 @@ -912,12 +912,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): those that match the filter. Returns: - tuple[list[FrozenEvents], str]: Returns the results as a list of - dicts and a token that points to the end of the result set. The - dicts have the keys "event_id", "topological_ordering" and - "stream_ordering". If no events are returned then the end of the - stream has been reached (i.e. there are no events between - `from_key` and `to_key`). + tuple[list[FrozenEvent], str]: Returns the results as a list of + events and a token that points to the end of the result set. If no + events are returned then the end of the stream has been reached + (i.e. there are no events between `from_key` and `to_key`). """ from_key = RoomStreamToken.parse(from_key) From 5f158ec039e4753959aad9b8d288b3d8cb4959a1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 12 Jul 2019 17:26:02 +0100 Subject: [PATCH 39/80] Implement access token expiry (#5660) Record how long an access token is valid for, and raise a soft-logout once it expires. --- changelog.d/5660.feature | 1 + docs/sample_config.yaml | 11 ++ synapse/api/auth.py | 12 ++ synapse/api/errors.py | 8 +- synapse/config/registration.py | 16 +++ synapse/handlers/auth.py | 17 ++- synapse/handlers/register.py | 39 ++++--- synapse/storage/registration.py | 19 ++- .../schema/delta/55/access_token_expiry.sql | 18 +++ tests/api/test_auth.py | 6 +- tests/handlers/test_auth.py | 20 +++- tests/handlers/test_register.py | 5 +- tests/rest/client/v1/test_login.py | 108 ++++++++++++++++++ tests/storage/test_registration.py | 8 +- 14 files changed, 255 insertions(+), 33 deletions(-) create mode 100644 changelog.d/5660.feature create mode 100644 synapse/storage/schema/delta/55/access_token_expiry.sql diff --git a/changelog.d/5660.feature b/changelog.d/5660.feature new file mode 100644 index 0000000000..82889fdaf1 --- /dev/null +++ b/changelog.d/5660.feature @@ -0,0 +1 @@ +Implement `session_lifetime` configuration option, after which access tokens will expire. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 0462f0a17a..663ff31622 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -786,6 +786,17 @@ uploads_path: "DATADIR/uploads" # renew_at: 1w # renew_email_subject: "Renew your %(app)s account" +# Time that a user's session remains valid for, after they log in. +# +# Note that this is not currently compatible with guest logins. +# +# Note also that this is calculated at login time: changes are not applied +# retrospectively to users who have already logged in. +# +# By default, this is infinite. +# +#session_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/api/auth.py b/synapse/api/auth.py index afc6400948..d9e943c39c 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -319,6 +319,17 @@ class Auth(object): # first look in the database r = yield self._look_up_user_by_access_token(token) if r: + valid_until_ms = r["valid_until_ms"] + if ( + valid_until_ms is not None + and valid_until_ms < self.clock.time_msec() + ): + # there was a valid access token, but it has expired. + # soft-logout the user. + raise InvalidClientTokenError( + msg="Access token has expired", soft_logout=True + ) + defer.returnValue(r) # otherwise it needs to be a valid macaroon @@ -505,6 +516,7 @@ class Auth(object): "token_id": ret.get("token_id", None), "is_guest": False, "device_id": ret.get("device_id"), + "valid_until_ms": ret.get("valid_until_ms"), } defer.returnValue(user_info) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 41fd04cd54..a6e753c30c 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -245,8 +245,14 @@ class MissingClientTokenError(InvalidClientCredentialsError): class InvalidClientTokenError(InvalidClientCredentialsError): """Raised when we didn't understand the access token in a request""" - def __init__(self, msg="Unrecognised access token"): + def __init__(self, msg="Unrecognised access token", soft_logout=False): super().__init__(msg=msg, errcode="M_UNKNOWN_TOKEN") + self._soft_logout = soft_logout + + def error_dict(self): + d = super().error_dict() + d["soft_logout"] = self._soft_logout + return d class ResourceLimitError(SynapseError): diff --git a/synapse/config/registration.py b/synapse/config/registration.py index b895c4e9f4..34cb11468c 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -84,6 +84,11 @@ class RegistrationConfig(Config): "disable_msisdn_registration", False ) + session_lifetime = config.get("session_lifetime") + if session_lifetime is not None: + session_lifetime = self.parse_duration(session_lifetime) + self.session_lifetime = session_lifetime + def generate_config_section(self, generate_secrets=False, **kwargs): if generate_secrets: registration_shared_secret = 'registration_shared_secret: "%s"' % ( @@ -141,6 +146,17 @@ class RegistrationConfig(Config): # renew_at: 1w # renew_email_subject: "Renew your %%(app)s account" + # Time that a user's session remains valid for, after they log in. + # + # Note that this is not currently compatible with guest logins. + # + # Note also that this is calculated at login time: changes are not applied + # retrospectively to users who have already logged in. + # + # By default, this is infinite. + # + #session_lifetime: 24h + # The user must provide all of the below types of 3PID when registering. # #registrations_require_3pid: diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index da312b188e..b74a6e9c62 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +import time import unicodedata import attr @@ -558,7 +559,7 @@ class AuthHandler(BaseHandler): return self.sessions[session_id] @defer.inlineCallbacks - def get_access_token_for_user_id(self, user_id, device_id=None): + def get_access_token_for_user_id(self, user_id, device_id, valid_until_ms): """ Creates a new access token for the user with the given user ID. @@ -572,16 +573,26 @@ class AuthHandler(BaseHandler): device_id (str|None): the device ID to associate with the tokens. None to leave the tokens unassociated with a device (deprecated: we should always have a device ID) + valid_until_ms (int|None): when the token is valid until. None for + no expiry. Returns: The access token for the user's session. Raises: StoreError if there was a problem storing the token. """ - logger.info("Logging in user %s on device %s", user_id, device_id) + fmt_expiry = "" + if valid_until_ms is not None: + fmt_expiry = time.strftime( + " until %Y-%m-%d %H:%M:%S", time.localtime(valid_until_ms / 1000.0) + ) + logger.info("Logging in user %s on device %s%s", user_id, device_id, fmt_expiry) + yield self.auth.check_auth_blocking(user_id) access_token = self.macaroon_gen.generate_access_token(user_id) - yield self.store.add_access_token_to_user(user_id, access_token, device_id) + yield self.store.add_access_token_to_user( + user_id, access_token, device_id, valid_until_ms + ) # the device *should* have been registered before we got here; however, # it's possible we raced against a DELETE operation. The thing we diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 420c5cb5bc..bb7cfd71b9 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -84,6 +84,8 @@ class RegistrationHandler(BaseHandler): self.device_handler = hs.get_device_handler() self.pusher_pool = hs.get_pusherpool() + self.session_lifetime = hs.config.session_lifetime + @defer.inlineCallbacks def check_username(self, localpart, guest_access_token=None, assigned_user_id=None): if types.contains_invalid_mxid_characters(localpart): @@ -599,6 +601,8 @@ class RegistrationHandler(BaseHandler): def register_device(self, user_id, device_id, initial_display_name, is_guest=False): """Register a device for a user and generate an access token. + The access token will be limited by the homeserver's session_lifetime config. + Args: user_id (str): full canonical @user:id device_id (str|None): The device ID to check, or None to generate @@ -619,20 +623,29 @@ class RegistrationHandler(BaseHandler): is_guest=is_guest, ) defer.returnValue((r["device_id"], r["access_token"])) - else: - device_id = yield self.device_handler.check_device_registered( - user_id, device_id, initial_display_name - ) - if is_guest: - access_token = self.macaroon_gen.generate_access_token( - user_id, ["guest = true"] - ) - else: - access_token = yield self._auth_handler.get_access_token_for_user_id( - user_id, device_id=device_id - ) - defer.returnValue((device_id, access_token)) + valid_until_ms = None + if self.session_lifetime is not None: + if is_guest: + raise Exception( + "session_lifetime is not currently implemented for guest access" + ) + valid_until_ms = self.clock.time_msec() + self.session_lifetime + + device_id = yield self.device_handler.check_device_registered( + user_id, device_id, initial_display_name + ) + if is_guest: + assert valid_until_ms is None + access_token = self.macaroon_gen.generate_access_token( + user_id, ["guest = true"] + ) + else: + access_token = yield self._auth_handler.get_access_token_for_user_id( + user_id, device_id=device_id, valid_until_ms=valid_until_ms + ) + + defer.returnValue((device_id, access_token)) @defer.inlineCallbacks def post_registration_actions( diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 73580f1725..8b2c2a97ab 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -90,7 +90,8 @@ class RegistrationWorkerStore(SQLBaseStore): token (str): The access token of a user. Returns: defer.Deferred: None, if the token did not match, otherwise dict - including the keys `name`, `is_guest`, `device_id`, `token_id`. + including the keys `name`, `is_guest`, `device_id`, `token_id`, + `valid_until_ms`. """ return self.runInteraction( "get_user_by_access_token", self._query_for_auth, token @@ -284,7 +285,7 @@ class RegistrationWorkerStore(SQLBaseStore): def _query_for_auth(self, txn, token): sql = ( "SELECT users.name, users.is_guest, access_tokens.id as token_id," - " access_tokens.device_id" + " access_tokens.device_id, access_tokens.valid_until_ms" " FROM users" " INNER JOIN access_tokens on users.name = access_tokens.user_id" " WHERE token = ?" @@ -679,14 +680,16 @@ class RegistrationStore( defer.returnValue(batch_size) @defer.inlineCallbacks - def add_access_token_to_user(self, user_id, token, device_id=None): + def add_access_token_to_user(self, user_id, token, device_id, valid_until_ms): """Adds an access token for the given user. Args: user_id (str): The user ID. token (str): The new access token to add. device_id (str): ID of the device to associate with the access - token + token + valid_until_ms (int|None): when the token is valid until. None for + no expiry. Raises: StoreError if there was a problem adding this. """ @@ -694,7 +697,13 @@ class RegistrationStore( yield self._simple_insert( "access_tokens", - {"id": next_id, "user_id": user_id, "token": token, "device_id": device_id}, + { + "id": next_id, + "user_id": user_id, + "token": token, + "device_id": device_id, + "valid_until_ms": valid_until_ms, + }, desc="add_access_token_to_user", ) diff --git a/synapse/storage/schema/delta/55/access_token_expiry.sql b/synapse/storage/schema/delta/55/access_token_expiry.sql new file mode 100644 index 0000000000..4590604bfd --- /dev/null +++ b/synapse/storage/schema/delta/55/access_token_expiry.sql @@ -0,0 +1,18 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- when this access token can be used until, in ms since the epoch. NULL means the token +-- never expires. +ALTER TABLE access_tokens ADD COLUMN valid_until_ms BIGINT; diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index ee92ceeb60..c0cb8ef296 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -262,9 +262,11 @@ class AuthTestCase(unittest.TestCase): self.store.add_access_token_to_user = Mock() token = yield self.hs.handlers.auth_handler.get_access_token_for_user_id( - USER_ID, "DEVICE" + USER_ID, "DEVICE", valid_until_ms=None + ) + self.store.add_access_token_to_user.assert_called_with( + USER_ID, token, "DEVICE", None ) - self.store.add_access_token_to_user.assert_called_with(USER_ID, token, "DEVICE") def get_user(tok): if token != tok: diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index b204a0700d..b03103d96f 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -117,7 +117,9 @@ class AuthTestCase(unittest.TestCase): def test_mau_limits_disabled(self): self.hs.config.limit_usage_by_mau = False # Ensure does not throw exception - yield self.auth_handler.get_access_token_for_user_id("user_a") + yield self.auth_handler.get_access_token_for_user_id( + "user_a", device_id=None, valid_until_ms=None + ) yield self.auth_handler.validate_short_term_login_token_and_get_user_id( self._get_macaroon().serialize() @@ -131,7 +133,9 @@ class AuthTestCase(unittest.TestCase): ) with self.assertRaises(ResourceLimitError): - yield self.auth_handler.get_access_token_for_user_id("user_a") + yield self.auth_handler.get_access_token_for_user_id( + "user_a", device_id=None, valid_until_ms=None + ) self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.large_number_of_users) @@ -150,7 +154,9 @@ class AuthTestCase(unittest.TestCase): return_value=defer.succeed(self.hs.config.max_mau_value) ) with self.assertRaises(ResourceLimitError): - yield self.auth_handler.get_access_token_for_user_id("user_a") + yield self.auth_handler.get_access_token_for_user_id( + "user_a", device_id=None, valid_until_ms=None + ) self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.hs.config.max_mau_value) @@ -166,7 +172,9 @@ class AuthTestCase(unittest.TestCase): self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.hs.config.max_mau_value) ) - yield self.auth_handler.get_access_token_for_user_id("user_a") + yield self.auth_handler.get_access_token_for_user_id( + "user_a", device_id=None, valid_until_ms=None + ) self.hs.get_datastore().user_last_seen_monthly_active = Mock( return_value=defer.succeed(self.hs.get_clock().time_msec()) ) @@ -185,7 +193,9 @@ class AuthTestCase(unittest.TestCase): return_value=defer.succeed(self.small_number_of_users) ) # Ensure does not raise exception - yield self.auth_handler.get_access_token_for_user_id("user_a") + yield self.auth_handler.get_access_token_for_user_id( + "user_a", device_id=None, valid_until_ms=None + ) self.hs.get_datastore().get_monthly_active_count = Mock( return_value=defer.succeed(self.small_number_of_users) diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 1b7e1dacee..90d0129374 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -272,7 +272,10 @@ class RegistrationTestCase(unittest.HomeserverTestCase): ) else: yield self.hs.get_auth_handler().delete_access_tokens_for_user(user_id) - yield self.store.add_access_token_to_user(user_id=user_id, token=token) + + yield self.store.add_access_token_to_user( + user_id=user_id, token=token, device_id=None, valid_until_ms=None + ) if displayname is not None: # logger.info("setting user display name: %s -> %s", user_id, displayname) diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py index 0397f91a9e..eae5411325 100644 --- a/tests/rest/client/v1/test_login.py +++ b/tests/rest/client/v1/test_login.py @@ -2,10 +2,14 @@ import json import synapse.rest.admin from synapse.rest.client.v1 import login +from synapse.rest.client.v2_alpha import devices +from synapse.rest.client.v2_alpha.account import WhoamiRestServlet from tests import unittest +from tests.unittest import override_config LOGIN_URL = b"/_matrix/client/r0/login" +TEST_URL = b"/_matrix/client/r0/account/whoami" class LoginRestServletTestCase(unittest.HomeserverTestCase): @@ -13,6 +17,8 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets_for_client_rest_resource, login.register_servlets, + devices.register_servlets, + lambda hs, http_server: WhoamiRestServlet(hs).register(http_server), ] def make_homeserver(self, reactor, clock): @@ -144,3 +150,105 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(channel.result["code"], b"403", channel.result) + + @override_config({"session_lifetime": "24h"}) + def test_soft_logout(self): + self.register_user("kermit", "monkey") + + # we shouldn't be able to make requests without an access token + request, channel = self.make_request(b"GET", TEST_URL) + self.render(request) + self.assertEquals(channel.result["code"], b"401", channel.result) + self.assertEquals(channel.json_body["errcode"], "M_MISSING_TOKEN") + + # log in as normal + params = { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": "kermit"}, + "password": "monkey", + } + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.code, 200, channel.result) + access_token = channel.json_body["access_token"] + device_id = channel.json_body["device_id"] + + # we should now be able to make requests with the access token + request, channel = self.make_request( + b"GET", TEST_URL, access_token=access_token + ) + self.render(request) + self.assertEquals(channel.code, 200, channel.result) + + # time passes + self.reactor.advance(24 * 3600) + + # ... and we should be soft-logouted + request, channel = self.make_request( + b"GET", TEST_URL, access_token=access_token + ) + self.render(request) + self.assertEquals(channel.code, 401, channel.result) + self.assertEquals(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") + self.assertEquals(channel.json_body["soft_logout"], True) + + # + # test behaviour after deleting the expired device + # + + # we now log in as a different device + access_token_2 = self.login("kermit", "monkey") + + # more requests with the expired token should still return a soft-logout + self.reactor.advance(3600) + request, channel = self.make_request( + b"GET", TEST_URL, access_token=access_token + ) + self.render(request) + self.assertEquals(channel.code, 401, channel.result) + self.assertEquals(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") + self.assertEquals(channel.json_body["soft_logout"], True) + + # ... but if we delete that device, it will be a proper logout + self._delete_device(access_token_2, "kermit", "monkey", device_id) + + request, channel = self.make_request( + b"GET", TEST_URL, access_token=access_token + ) + self.render(request) + self.assertEquals(channel.code, 401, channel.result) + self.assertEquals(channel.json_body["errcode"], "M_UNKNOWN_TOKEN") + self.assertEquals(channel.json_body["soft_logout"], False) + + def _delete_device(self, access_token, user_id, password, device_id): + """Perform the UI-Auth to delete a device""" + request, channel = self.make_request( + b"DELETE", "devices/" + device_id, access_token=access_token + ) + self.render(request) + self.assertEquals(channel.code, 401, channel.result) + # check it's a UI-Auth fail + self.assertEqual( + set(channel.json_body.keys()), + {"flows", "params", "session"}, + channel.result, + ) + + auth = { + "type": "m.login.password", + # https://github.com/matrix-org/synapse/issues/5665 + # "identifier": {"type": "m.id.user", "user": user_id}, + "user": user_id, + "password": password, + "session": channel.json_body["session"], + } + + request, channel = self.make_request( + b"DELETE", + "devices/" + device_id, + access_token=access_token, + content={"auth": auth}, + ) + self.render(request) + self.assertEquals(channel.code, 200, channel.result) diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 9365c4622d..0253c4ac05 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -57,7 +57,7 @@ class RegistrationStoreTestCase(unittest.TestCase): def test_add_tokens(self): yield self.store.register_user(self.user_id, self.pwhash) yield self.store.add_access_token_to_user( - self.user_id, self.tokens[1], self.device_id + self.user_id, self.tokens[1], self.device_id, valid_until_ms=None ) result = yield self.store.get_user_by_access_token(self.tokens[1]) @@ -72,9 +72,11 @@ class RegistrationStoreTestCase(unittest.TestCase): def test_user_delete_access_tokens(self): # add some tokens yield self.store.register_user(self.user_id, self.pwhash) - yield self.store.add_access_token_to_user(self.user_id, self.tokens[0]) yield self.store.add_access_token_to_user( - self.user_id, self.tokens[1], self.device_id + self.user_id, self.tokens[0], device_id=None, valid_until_ms=None + ) + yield self.store.add_access_token_to_user( + self.user_id, self.tokens[1], self.device_id, valid_until_ms=None ) # now delete some From d336b51331d5cf40a577397e8945223d1949e965 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 12 Jul 2019 17:27:07 +0100 Subject: [PATCH 40/80] Add a `docker` type to the towncrier configuration (#5673) ... and certain other changelog-related fixes --- CONTRIBUTING.rst | 29 +++++++++++++++--------- changelog.d/5619.docker | 1 + changelog.d/5619.misc | 1 - changelog.d/{5620.bugfix => 5620.docker} | 2 +- changelog.d/{5655.doc => 5655.misc} | 0 changelog.d/5673.misc | 1 + pyproject.toml | 5 ++++ 7 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 changelog.d/5619.docker delete mode 100644 changelog.d/5619.misc rename changelog.d/{5620.bugfix => 5620.docker} (80%) rename changelog.d/{5655.doc => 5655.misc} (100%) create mode 100644 changelog.d/5673.misc diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2c44422a0e..94dc650485 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -30,11 +30,10 @@ use github's pull request workflow to review the contribution, and either ask you to make any refinements needed or merge it and make them ourselves. The changes will then land on master when we next do a release. -We use `CircleCI `_ and `Buildkite -`_ for continuous integration. -Buildkite builds need to be authorised by a maintainer. If your change breaks -the build, this will be shown in GitHub, so please keep an eye on the pull -request for feedback. +We use `Buildkite `_ for +continuous integration. Buildkite builds need to be authorised by a +maintainer. If your change breaks the build, this will be shown in GitHub, so +please keep an eye on the pull request for feedback. To run unit tests in a local development environment, you can use: @@ -70,13 +69,21 @@ All changes, even minor ones, need a corresponding changelog / newsfragment entry. These are managed by Towncrier (https://github.com/hawkowl/towncrier). -To create a changelog entry, make a new file in the ``changelog.d`` -file named in the format of ``PRnumber.type``. The type can be -one of ``feature``, ``bugfix``, ``removal`` (also used for -deprecations), or ``misc`` (for internal-only changes). +To create a changelog entry, make a new file in the ``changelog.d`` file named +in the format of ``PRnumber.type``. The type can be one of the following: -The content of the file is your changelog entry, which can contain Markdown -formatting. The entry should end with a full stop ('.') for consistency. +* ``feature``. +* ``bugfix``. +* ``docker`` (for updates to the Docker image). +* ``doc`` (for updates to the documentation). +* ``removal`` (also used for deprecations). +* ``misc`` (for internal-only changes). + +The content of the file is your changelog entry, which should be a short +description of your change in the same style as the rest of our `changelog +`_. The file can +contain Markdown formatting, and should end with a full stop ('.') for +consistency. Adding credits to the changelog is encouraged, we value your contributions and would like to have you shouted out in the release notes! diff --git a/changelog.d/5619.docker b/changelog.d/5619.docker new file mode 100644 index 0000000000..b69e5cc57c --- /dev/null +++ b/changelog.d/5619.docker @@ -0,0 +1 @@ +Base Docker image on a newer Alpine Linux version (3.8 -> 3.10). diff --git a/changelog.d/5619.misc b/changelog.d/5619.misc deleted file mode 100644 index c5e22d2051..0000000000 --- a/changelog.d/5619.misc +++ /dev/null @@ -1 +0,0 @@ -Base Docker image on a newer Alpine Linux version (3.8 -> 3.10) diff --git a/changelog.d/5620.bugfix b/changelog.d/5620.docker similarity index 80% rename from changelog.d/5620.bugfix rename to changelog.d/5620.docker index 17e1f133e8..cbb5a75d6a 100644 --- a/changelog.d/5620.bugfix +++ b/changelog.d/5620.docker @@ -1 +1 @@ -Add missing space in default logging file format generated by the Docker image +Add missing space in default logging file format generated by the Docker image. diff --git a/changelog.d/5655.doc b/changelog.d/5655.misc similarity index 100% rename from changelog.d/5655.doc rename to changelog.d/5655.misc diff --git a/changelog.d/5673.misc b/changelog.d/5673.misc new file mode 100644 index 0000000000..1942256358 --- /dev/null +++ b/changelog.d/5673.misc @@ -0,0 +1 @@ +Add a `docker` type to the towncrier configuration. diff --git a/pyproject.toml b/pyproject.toml index ef329aab41..db4a2e41e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,11 @@ name = "Bugfixes" showcontent = true + [[tool.towncrier.type]] + directory = "docker" + name = "Updates to the Docker image" + showcontent = true + [[tool.towncrier.type]] directory = "doc" name = "Improved Documentation" From 18c516698e78004df39ec62c3fc08ff03313ba4e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 15 Jul 2019 11:45:29 +0100 Subject: [PATCH 41/80] Return a different error from Invalid Password when a user is deactivated (#5674) Return `This account has been deactivated` instead of `Invalid password` when a user is deactivated. --- changelog.d/5674.feature | 1 + synapse/api/errors.py | 16 ++++++++++++++++ synapse/handlers/auth.py | 9 +++++++++ 3 files changed, 26 insertions(+) create mode 100644 changelog.d/5674.feature diff --git a/changelog.d/5674.feature b/changelog.d/5674.feature new file mode 100644 index 0000000000..04bdfa4ad5 --- /dev/null +++ b/changelog.d/5674.feature @@ -0,0 +1 @@ +Return "This account has been deactivated" when a deactivated user tries to login. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index a6e753c30c..ad3e262041 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -139,6 +139,22 @@ class ConsentNotGivenError(SynapseError): return cs_error(self.msg, self.errcode, consent_uri=self._consent_uri) +class UserDeactivatedError(SynapseError): + """The error returned to the client when the user attempted to access an + authenticated endpoint, but the account has been deactivated. + """ + + def __init__(self, msg): + """Constructs a UserDeactivatedError + + Args: + msg (str): The human-readable error message + """ + super(UserDeactivatedError, self).__init__( + code=http_client.FORBIDDEN, msg=msg, errcode=Codes.UNKNOWN + ) + + class RegistrationError(SynapseError): """An error raised when a registration event fails.""" diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index b74a6e9c62..d4d6574975 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( LoginError, StoreError, SynapseError, + UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter from synapse.logging.context import defer_to_thread @@ -623,6 +624,7 @@ class AuthHandler(BaseHandler): Raises: LimitExceededError if the ratelimiter's login requests count for this user is too high too proceed. + UserDeactivatedError if a user is found but is deactivated. """ self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) @@ -838,6 +840,13 @@ class AuthHandler(BaseHandler): if not lookupres: defer.returnValue(None) (user_id, password_hash) = lookupres + + # If the password hash is None, the account has likely been deactivated + if not password_hash: + deactivated = yield self.store.get_user_deactivated_status(user_id) + if deactivated: + raise UserDeactivatedError("This account has been deactivated") + result = yield self.validate_hash(password, password_hash) if not result: logger.warn("Failed password login for user %s", user_id) From 823e13ddf49b28eb0dab5d7be3921acc92fd0e44 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 13:15:34 +0100 Subject: [PATCH 42/80] Change add_arguments to be a static method --- synapse/config/_base.py | 32 +++++++++++++++++++++++++++++++- synapse/config/database.py | 3 ++- synapse/config/logger.py | 3 ++- synapse/config/registration.py | 3 ++- synapse/config/server.py | 3 ++- 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 14d3f7c1fe..e588f82981 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -137,12 +137,42 @@ class Config(object): return file_stream.read() def invoke_all(self, name, *args, **kargs): + """Invoke all instance methods with the given name and arguments in the + class's MRO. + + Args: + name (str): Name of function to invoke + *args + **kwargs + + Returns: + list: The list of the return values from each method called + """ results = [] for cls in type(self).mro(): if name in cls.__dict__: results.append(getattr(cls, name)(self, *args, **kargs)) return results + @classmethod + def invoke_all_static(cls, name, *args, **kargs): + """Invoke all static methods with the given name and arguments in the + class's MRO. + + Args: + name (str): Name of function to invoke + *args + **kwargs + + Returns: + list: The list of the return values from each method called + """ + results = [] + for c in cls.mro(): + if name in c.__dict__: + results.append(getattr(c, name)(*args, **kargs)) + return results + def generate_config( self, config_dir_path, @@ -241,7 +271,7 @@ class Config(object): # We can only invoke `add_arguments` on an actual object, but # `add_arguments` should be side effect free so this is probably fine. - cls().invoke_all("add_arguments", config_parser) + cls.invoke_all_static("add_arguments", config_parser) return config_parser diff --git a/synapse/config/database.py b/synapse/config/database.py index bcb2089dd7..746a6cd1f4 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -69,7 +69,8 @@ class DatabaseConfig(Config): if database_path is not None: self.database_config["args"]["database"] = database_path - def add_arguments(self, parser): + @staticmethod + def add_arguments(parser): db_group = parser.add_argument_group("database") db_group.add_argument( "-d", diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 931aec41c0..52cf691227 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -103,7 +103,8 @@ class LoggingConfig(Config): if args.log_file is not None: self.log_file = args.log_file - def add_arguments(cls, parser): + @staticmethod + def add_arguments(parser): logging_group = parser.add_argument_group("logging") logging_group.add_argument( "-v", diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 4a59e6ec90..ee58852515 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -222,7 +222,8 @@ class RegistrationConfig(Config): % locals() ) - def add_arguments(self, parser): + @staticmethod + def add_arguments(parser): reg_group = parser.add_argument_group("registration") reg_group.add_argument( "--enable-registration", diff --git a/synapse/config/server.py b/synapse/config/server.py index 2a74dea2ea..080d0630bd 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -639,7 +639,8 @@ class ServerConfig(Config): if args.print_pidfile is not None: self.print_pidfile = args.print_pidfile - def add_arguments(self, parser): + @staticmethod + def add_arguments(parser): server_group = parser.add_argument_group("server") server_group.add_argument( "-D", From 37b524f9718b0faeaaac0dccab62c9a5e270c4a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 13:19:57 +0100 Subject: [PATCH 43/80] Fix up comments --- synapse/app/admin_cmd.py | 4 ++-- synapse/config/_base.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index bd73c47ae2..e618a62432 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright 2016 OpenMarket Ltd +# Copyright 2019 Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -109,7 +109,7 @@ def start(config_options): subparser = parser.add_subparsers( title="Admin Commands", - description="Choose and admin command to perform.", + description="Choose an admin command to perform.", required=True, dest="command", metavar="", diff --git a/synapse/config/_base.py b/synapse/config/_base.py index e588f82981..74a7980ebe 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -284,7 +284,7 @@ class Config(object): Used for workers where we want to add extra flags/subcommands. Args: - conifg_parser (ArgumentParser) + config_parser (ArgumentParser) argv (list[str]) Returns: From fdefb9e29a7c28e5b218fc8d9745a2ec58bc3d27 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 13:43:25 +0100 Subject: [PATCH 44/80] Move creation of ArgumentParser to caller --- synapse/app/admin_cmd.py | 4 +++- synapse/config/_base.py | 15 +++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index e618a62432..c4d752593a 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import argparse import logging import sys @@ -105,7 +106,8 @@ def export_data_command(hs, user_id, directory): def start(config_options): - parser = HomeServerConfig.create_argument_parser("Synapse Admin Command") + parser = argparse.ArgumentParser(description="Synapse Admin Command") + HomeServerConfig.add_arguments_to_parser(parser) subparser = parser.add_subparsers( title="Admin Commands", diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 74a7980ebe..8c3acff03e 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -231,27 +231,24 @@ class Config(object): Returns: Config object. """ - config_parser = cls.create_argument_parser(description) + config_parser = argparse.ArgumentParser(description=description) + cls.add_arguments_to_parser(config_parser) obj, _ = cls.load_config_with_parser(config_parser, argv) return obj @classmethod - def create_argument_parser(cls, description): - """Create an ArgumentParser instance with all the config flags. + def add_arguments_to_parser(cls, config_parser): + """Adds all the config flags to an ArgumentParser. Doesn't support config-file-generation: used by the worker apps. Used for workers where we want to add extra flags/subcommands. Args: - description (str): App description - - Returns: - ArgumentParser + config_parser (ArgumentParser): App description """ - config_parser = argparse.ArgumentParser(description=description) config_parser.add_argument( "-c", "--config-path", @@ -273,8 +270,6 @@ class Config(object): # `add_arguments` should be side effect free so this is probably fine. cls.invoke_all_static("add_arguments", config_parser) - return config_parser - @classmethod def load_config_with_parser(cls, config_parser, argv): """Parse the commandline and config files with the given parser From c8f35d8d38c91b3493bd34363096a6e26756d8d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 13:49:18 +0100 Subject: [PATCH 45/80] Use set_defaults(func=) style --- synapse/app/admin_cmd.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index c4d752593a..611a196e54 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -90,15 +90,17 @@ class AdminCmdReplicationHandler(ReplicationClientHandler): @defer.inlineCallbacks -def export_data_command(hs, user_id, directory): +def export_data_command(hs, args): """Export data for a user. Args: - user_id (str) - directory (str|None): Directory to write output to. Will create a temp - directory if not specified. + hs (HomeServer) + args (argparse.Namespace) """ + user_id = args.user_id + directory = args.output_directory + res = yield hs.get_handlers().admin_handler.exfiltrate_user_data( user_id, FileExfiltrationWriter(user_id, directory=directory) ) @@ -129,6 +131,7 @@ def start(config_options): help="The directory to store the exported data in. Must be emtpy. Defaults" " to creating a temp directory.", ) + export_data_parser.set_defaults(func=export_data_command) try: config, args = HomeServerConfig.load_config_with_parser(parser, config_options) @@ -173,12 +176,6 @@ def start(config_options): ss.setup() - if args.command == "export-data": - command = lambda: export_data_command(ss, args.user_id, args.output_directory) - else: - # This shouldn't happen. - raise ConfigError("Unknown admin command %s" % (args.command,)) - # We use task.react as the basic run command as it correctly handles tearing # down the reactor when the deferreds resolve and setting the return value. # We also make sure that `_base.start` gets run before we actually run the @@ -188,7 +185,7 @@ def start(config_options): def run(_reactor): with LoggingContext("command"): yield _base.start(ss, []) - yield command() + yield args.func(ss, args) _base.start_worker_reactor( "synapse-admin-cmd", config, run_command=lambda: task.react(run) From 1b2067f53d772de1cadca22bdee176b9509d5b6f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 14:15:22 +0100 Subject: [PATCH 46/80] Add FileExfiltrationWriter --- synapse/app/admin_cmd.py | 70 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 611a196e54..13281d0af9 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -15,7 +15,11 @@ # limitations under the License. import argparse import logging +import os import sys +import tempfile + +from canonicaljson import json from twisted.internet import defer, task @@ -24,7 +28,7 @@ from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.handlers.admin import FileExfiltrationWriter +from synapse.handlers.admin import ExfiltrationWriter from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore @@ -107,6 +111,70 @@ def export_data_command(hs, args): print(res) +class FileExfiltrationWriter(ExfiltrationWriter): + """An ExfiltrationWriter that writes the users data to a directory. + Returns the directory location on completion. + + Note: This writes to disk on the main reactor thread. + + Args: + user_id (str): The user whose data is being exfiltrated. + directory (str|None): The directory to write the data to, if None then + will write to a temporary directory. + """ + + def __init__(self, user_id, directory=None): + self.user_id = user_id + + if directory: + self.base_directory = directory + else: + self.base_directory = tempfile.mkdtemp( + prefix="synapse-exfiltrate__%s__" % (user_id,) + ) + + os.makedirs(self.base_directory, exist_ok=True) + if list(os.listdir(self.base_directory)): + raise Exception("Directory must be empty") + + def write_events(self, room_id, events): + room_directory = os.path.join(self.base_directory, "rooms", room_id) + os.makedirs(room_directory, exist_ok=True) + events_file = os.path.join(room_directory, "events") + + with open(events_file, "a") as f: + for event in events: + print(json.dumps(event.get_pdu_json()), file=f) + + def write_state(self, room_id, event_id, state): + room_directory = os.path.join(self.base_directory, "rooms", room_id) + state_directory = os.path.join(room_directory, "state") + os.makedirs(state_directory, exist_ok=True) + + event_file = os.path.join(state_directory, event_id) + + with open(event_file, "a") as f: + for event in state.values(): + print(json.dumps(event.get_pdu_json()), file=f) + + def write_invite(self, room_id, event, state): + self.write_events(room_id, [event]) + + # We write the invite state somewhere else as they aren't full events + # and are only a subset of the state at the event. + room_directory = os.path.join(self.base_directory, "rooms", room_id) + os.makedirs(room_directory, exist_ok=True) + + invite_state = os.path.join(room_directory, "invite_state") + + with open(invite_state, "a") as f: + for event in state.values(): + print(json.dumps(event), file=f) + + def finished(self): + return self.base_directory + + def start(config_options): parser = argparse.ArgumentParser(description="Synapse Admin Command") HomeServerConfig.add_arguments_to_parser(parser) From eca4f5ac730d9ecf29d25888a40a7264b6041a54 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 14:17:28 +0100 Subject: [PATCH 47/80] s/exfiltrate_user_data/export_user_data/ --- synapse/app/admin_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 13281d0af9..3ff19f18e5 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -105,7 +105,7 @@ def export_data_command(hs, args): user_id = args.user_id directory = args.output_directory - res = yield hs.get_handlers().admin_handler.exfiltrate_user_data( + res = yield hs.get_handlers().admin_handler.export_user_data( user_id, FileExfiltrationWriter(user_id, directory=directory) ) print(res) From 03cc8c4b5d187129904dd76a525f111424c31de0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jul 2019 14:25:05 +0100 Subject: [PATCH 48/80] Fix invoking add_argument from homeserver.py --- synapse/config/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 8c3acff03e..ba1025f86e 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -468,7 +468,7 @@ class Config(object): formatter_class=argparse.RawDescriptionHelpFormatter, ) - obj.invoke_all("add_arguments", parser) + obj.invoke_all_static("add_arguments", parser) args = parser.parse_args(remaining_args) config_dict = read_config_files(config_files) From d0d479c1af78af2204a7ed5aee1e187225a5af90 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jul 2019 09:52:56 +0100 Subject: [PATCH 49/80] Fix typo in synapse/app/admin_cmd.py Co-Authored-By: Aaron Raimist --- synapse/app/admin_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 3ff19f18e5..68802ca751 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -196,7 +196,7 @@ def start(config_options): action="store", metavar="DIRECTORY", required=False, - help="The directory to store the exported data in. Must be emtpy. Defaults" + help="The directory to store the exported data in. Must be empty. Defaults" " to creating a temp directory.", ) export_data_parser.set_defaults(func=export_data_command) From f44354e17f11c2a5949861052db1f49d8b67233b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jul 2019 11:39:13 +0100 Subject: [PATCH 50/80] Clean up arg name and remove lying comment --- synapse/config/_base.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index ba1025f86e..6ce5cd07fb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -266,12 +266,10 @@ class Config(object): " Defaults to the directory containing the last config file", ) - # We can only invoke `add_arguments` on an actual object, but - # `add_arguments` should be side effect free so this is probably fine. cls.invoke_all_static("add_arguments", config_parser) @classmethod - def load_config_with_parser(cls, config_parser, argv): + def load_config_with_parser(cls, parser, argv): """Parse the commandline and config files with the given parser Doesn't support config-file-generation: used by the worker apps. @@ -279,23 +277,23 @@ class Config(object): Used for workers where we want to add extra flags/subcommands. Args: - config_parser (ArgumentParser) + parser (ArgumentParser) argv (list[str]) Returns: tuple[HomeServerConfig, argparse.Namespace]: Returns the parsed config object and the parsed argparse.Namespace object from - `config_parser.parse_args(..)` + `parser.parse_args(..)` """ obj = cls() - config_args = config_parser.parse_args(argv) + config_args = parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) if not config_files: - config_parser.error("Must supply a config file.") + parser.error("Must supply a config file.") if config_args.keys_directory: config_dir_path = config_args.keys_directory From 5ed7853bb018b58e99c6dd3ca91905b877968494 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jul 2019 11:45:57 +0100 Subject: [PATCH 51/80] Remove pointless description --- synapse/app/admin_cmd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 68802ca751..1fd52a5526 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -181,7 +181,6 @@ def start(config_options): subparser = parser.add_subparsers( title="Admin Commands", - description="Choose an admin command to perform.", required=True, dest="command", metavar="", From 65c5592b8ed11e5ed4c3d6e29aa530d089c463ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 17 Jul 2019 16:49:19 +0100 Subject: [PATCH 52/80] Refactor `get_events_as_list` (#5699) A couple of changes here: * get rid of a redundant `allow_rejected` condition - we should already have filtered out any rejected events before we get to that point in the code, and the redundancy is confusing. Instead, let's stick in an assertion just to make double-sure we aren't leaking rejected events by mistake. * factor out a `_get_events_from_cache_or_db` method, which is going to be important for a forthcoming fix to redactions. --- changelog.d/5699.bugfix | 1 + synapse/storage/events_worker.py | 118 +++++++++++++++++++------------ 2 files changed, 73 insertions(+), 46 deletions(-) create mode 100644 changelog.d/5699.bugfix diff --git a/changelog.d/5699.bugfix b/changelog.d/5699.bugfix new file mode 100644 index 0000000000..30d5e67f67 --- /dev/null +++ b/changelog.d/5699.bugfix @@ -0,0 +1 @@ +Fix some problems with authenticating redactions in recent room versions. \ No newline at end of file diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 874d0a56bc..6d6bb1d5d3 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -218,37 +218,23 @@ class EventsWorkerStore(SQLBaseStore): if not event_ids: defer.returnValue([]) - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, allow_rejected=allow_rejected + # there may be duplicates so we cast the list to a set + event_entry_map = yield self._get_events_from_cache_or_db( + set(event_ids), allow_rejected=allow_rejected ) - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - log_ctx = LoggingContext.current_context() - log_ctx.record_event_fetch(len(missing_events_ids)) - - # Note that _enqueue_events is also responsible for turning db rows - # into FrozenEvents (via _get_event_from_row), which involves seeing if - # the events have been redacted, and if so pulling the redaction event out - # of the database to check it. - # - # _enqueue_events is a bit of a rubbish name but naming is hard. - missing_events = yield self._enqueue_events( - missing_events_ids, allow_rejected=allow_rejected - ) - - event_entry_map.update(missing_events) - events = [] - for event_id in event_id_list: + for event_id in event_ids: entry = event_entry_map.get(event_id, None) if not entry: continue + if not allow_rejected: + assert not entry.event.rejected_reason, ( + "rejected event returned from _get_events_from_cache_or_db despite " + "allow_rejected=False" + ) + # Starting in room version v3, some redactions need to be rechecked if we # didn't have the redacted event at the time, so we recheck on read # instead. @@ -291,34 +277,74 @@ class EventsWorkerStore(SQLBaseStore): # recheck. entry.event.internal_metadata.recheck_redaction = False else: - # We don't have the event that is being redacted, so we - # assume that the event isn't authorized for now. (If we - # later receive the event, then we will always redact - # it anyway, since we have this redaction) + # We either don't have the event that is being redacted (so we + # assume that the event isn't authorised for now), or the + # senders don't match (so it will never be authorised). Either + # way, we shouldn't return it. + # + # (If we later receive the event, then we will redact it anyway, + # since we have this redaction) continue - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event - events.append(event) + events.append(event) - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender defer.returnValue(events) + @defer.inlineCallbacks + def _get_events_from_cache_or_db(self, event_ids, allow_rejected=False): + """Fetch a bunch of events from the cache or the database. + + If events are pulled from the database, they will be cached for future lookups. + + Args: + event_ids (Iterable[str]): The event_ids of the events to fetch + allow_rejected (bool): Whether to include rejected events + + Returns: + Deferred[Dict[str, _EventCacheEntry]]: + map from event id to result + """ + event_entry_map = self._get_events_from_cache( + event_ids, allow_rejected=allow_rejected + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + log_ctx = LoggingContext.current_context() + log_ctx.record_event_fetch(len(missing_events_ids)) + + # Note that _enqueue_events is also responsible for turning db rows + # into FrozenEvents (via _get_event_from_row), which involves seeing if + # the events have been redacted, and if so pulling the redaction event out + # of the database to check it. + # + # _enqueue_events is a bit of a rubbish name but naming is hard. + missing_events = yield self._enqueue_events( + missing_events_ids, allow_rejected=allow_rejected + ) + + event_entry_map.update(missing_events) + + return event_entry_map + def _invalidate_get_event_cache(self, event_id): self._get_event_cache.invalidate((event_id,)) @@ -326,7 +352,7 @@ class EventsWorkerStore(SQLBaseStore): """Fetch events from the caches Args: - events (list(str)): list of event_ids to fetch + events (Iterable[str]): list of event_ids to fetch allow_rejected (bool): Whether to return events that were rejected update_metrics (bool): Whether to update the cache hit ratio metrics From 375162b3c36482b006d28ab8bf719617945c4b1e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 17 Jul 2019 16:52:02 +0100 Subject: [PATCH 53/80] Fix redaction authentication (#5700) Ensures that redactions are correctly authenticated for recent room versions. There are a few things going on here: * `_fetch_event_rows` is updated to return a dict rather than a list of rows. * Rather than returning multiple copies of an event which was redacted multiple times, it returns the redactions as a list within the dict. * It also returns the actual rejection reason, rather than merely the fact that it was rejected, so that we don't have to query the table again in `_get_event_from_row`. * The redaction handling is factored out of `_get_event_from_row`, and now checks if any of the redactions are valid. --- changelog.d/5700.bugfix | 2 + synapse/storage/events_worker.py | 214 +++++++++++++++++++------------ 2 files changed, 131 insertions(+), 85 deletions(-) create mode 100644 changelog.d/5700.bugfix diff --git a/changelog.d/5700.bugfix b/changelog.d/5700.bugfix new file mode 100644 index 0000000000..51bce8d441 --- /dev/null +++ b/changelog.d/5700.bugfix @@ -0,0 +1,2 @@ +Fix some problems with authenticating redactions in recent room versions. + diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 6d6bb1d5d3..e2fc7bc047 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -37,6 +37,7 @@ from synapse.logging.context import ( ) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id +from synapse.util import batch_iter from synapse.util.metrics import Measure from ._base import SQLBaseStore @@ -240,27 +241,8 @@ class EventsWorkerStore(SQLBaseStore): # instead. if not allow_rejected and entry.event.type == EventTypes.Redaction: if entry.event.internal_metadata.need_to_check_redaction(): - # XXX: we need to avoid calling get_event here. - # - # The problem is that we end up at this point when an event - # which has been redacted is pulled out of the database by - # _enqueue_events, because _enqueue_events needs to check - # the redaction before it can cache the redacted event. So - # obviously, calling get_event to get the redacted event out - # of the database gives us an infinite loop. - # - # For now (quick hack to fix during 0.99 release cycle), we - # just go and fetch the relevant row from the db, but it - # would be nice to think about how we can cache this rather - # than hit the db every time we access a redaction event. - # - # One thought on how to do this: - # 1. split get_events_as_list up so that it is divided into - # (a) get the rawish event from the db/cache, (b) do the - # redaction/rejection filtering - # 2. have _get_event_from_row just call the first half of - # that - + # XXX: we should probably use _get_events_from_cache_or_db here, + # so that we can benefit from caching. orig_sender = yield self._simple_select_one_onecol( table="events", keyvalues={"event_id": entry.event.redacts}, @@ -410,19 +392,16 @@ class EventsWorkerStore(SQLBaseStore): The fetch requests. Each entry consists of a list of event ids to be fetched, and a deferred to be completed once the events have been fetched. - """ with Measure(self._clock, "_fetch_event_list"): try: event_id_lists = list(zip(*event_list))[0] event_ids = [item for sublist in event_id_lists for item in sublist] - rows = self._new_transaction( + row_dict = self._new_transaction( conn, "do_fetch", [], [], self._fetch_event_rows, event_ids ) - row_dict = {r["event_id"]: r for r in rows} - # We only want to resolve deferreds from the main thread def fire(lst, res): for ids, d in lst: @@ -480,7 +459,7 @@ class EventsWorkerStore(SQLBaseStore): logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] + rows[:] = [r for r in rows if r["rejected_reason"] is None] res = yield make_deferred_yieldable( defer.gatherResults( @@ -489,8 +468,8 @@ class EventsWorkerStore(SQLBaseStore): self._get_event_from_row, row["internal_metadata"], row["json"], - row["redacts"], - rejected_reason=row["rejects"], + row["redactions"], + rejected_reason=row["rejected_reason"], format_version=row["format_version"], ) for row in rows @@ -501,49 +480,98 @@ class EventsWorkerStore(SQLBaseStore): defer.returnValue({e.event.event_id: e for e in res if e}) - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) // N): - evs = events[i * N : (i + 1) * N] - if not evs: - break + def _fetch_event_rows(self, txn, event_ids): + """Fetch event rows from the database + Events which are not found are omitted from the result. + + The returned per-event dicts contain the following keys: + + * event_id (str) + + * json (str): json-encoded event structure + + * internal_metadata (str): json-encoded internal metadata dict + + * format_version (int|None): The format of the event. Hopefully one + of EventFormatVersions. 'None' means the event predates + EventFormatVersions (so the event is format V1). + + * rejected_reason (str|None): if the event was rejected, the reason + why. + + * redactions (List[str]): a list of event-ids which (claim to) redact + this event. + + Args: + txn (twisted.enterprise.adbapi.Connection): + event_ids (Iterable[str]): event IDs to fetch + + Returns: + Dict[str, Dict]: a map from event id to event info. + """ + event_dict = {} + for evs in batch_iter(event_ids, 200): sql = ( "SELECT " - " e.event_id as event_id, " + " e.event_id, " " e.internal_metadata," " e.json," " e.format_version, " - " r.redacts as redacts," - " rej.event_id as rejects " + " rej.reason " " FROM event_json as e" " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" " WHERE e.event_id IN (%s)" ) % (",".join(["?"] * len(evs)),) txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - return rows + for row in txn: + event_id = row[0] + event_dict[event_id] = { + "event_id": event_id, + "internal_metadata": row[1], + "json": row[2], + "format_version": row[3], + "rejected_reason": row[4], + "redactions": [], + } + + # check for redactions + redactions_sql = ( + "SELECT event_id, redacts FROM redactions WHERE redacts IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(redactions_sql, evs) + + for (redacter, redacted) in txn: + d = event_dict.get(redacted) + if d: + d["redactions"].append(redacter) + + return event_dict @defer.inlineCallbacks def _get_event_from_row( - self, internal_metadata, js, redacted, format_version, rejected_reason=None + self, internal_metadata, js, redactions, format_version, rejected_reason=None ): + """Parse an event row which has been read from the database + + Args: + internal_metadata (str): json-encoded internal_metadata column + js (str): json-encoded event body from event_json + redactions (list[str]): a list of the events which claim to have redacted + this event, from the redactions table + format_version: (str): the 'format_version' column + rejected_reason (str|None): the reason this event was rejected, if any + + Returns: + _EventCacheEntry + """ with Measure(self._clock, "_get_event_from_row"): d = json.loads(js) internal_metadata = json.loads(internal_metadata) - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - if format_version is None: # This means that we stored the event before we had the concept # of a event format version, so it must be a V1 event. @@ -555,41 +583,7 @@ class EventsWorkerStore(SQLBaseStore): rejected_reason=rejected_reason, ) - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, check_redacted=False, allow_none=True - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - # Starting in room version v3, some redactions need to be - # rechecked if we didn't have the redacted event at the - # time, so we recheck on read instead. - if because.internal_metadata.need_to_check_redaction(): - expected_domain = get_domain_from_id(original_ev.sender) - if get_domain_from_id(because.sender) == expected_domain: - # This redaction event is allowed. Mark as not needing a - # recheck. - because.internal_metadata.recheck_redaction = False - else: - # Senders don't match, so the event isn't actually redacted - redacted_event = None + redacted_event = yield self._maybe_redact_event_row(original_ev, redactions) cache_entry = _EventCacheEntry( event=original_ev, redacted_event=redacted_event @@ -599,6 +593,56 @@ class EventsWorkerStore(SQLBaseStore): defer.returnValue(cache_entry) + @defer.inlineCallbacks + def _maybe_redact_event_row(self, original_ev, redactions): + """Given an event object and a list of possible redacting event ids, + determine whether to honour any of those redactions and if so return a redacted + event. + + Args: + original_ev (EventBase): + redactions (iterable[str]): list of event ids of potential redaction events + + Returns: + Deferred[EventBase|None]: if the event should be redacted, a pruned + event object. Otherwise, None. + """ + redaction_map = yield self._get_events_from_cache_or_db(redactions) + + for redaction_id in redactions: + redaction_entry = redaction_map.get(redaction_id) + if not redaction_entry: + # we don't have the redaction event, or the redaction event was not + # authorized. + continue + + redaction_event = redaction_entry.event + + # Starting in room version v3, some redactions need to be + # rechecked if we didn't have the redacted event at the + # time, so we recheck on read instead. + if redaction_event.internal_metadata.need_to_check_redaction(): + expected_domain = get_domain_from_id(original_ev.sender) + if get_domain_from_id(redaction_event.sender) == expected_domain: + # This redaction event is allowed. Mark as not needing a recheck. + redaction_event.internal_metadata.recheck_redaction = False + else: + # Senders don't match, so the event isn't actually redacted + continue + + # we found a good redaction event. Redact! + redacted_event = prune_event(original_ev) + redacted_event.unsigned["redacted_by"] = redaction_id + + # It's fine to add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = redaction_event + + return redacted_event + + # no valid redaction found for this event + return None + @defer.inlineCallbacks def have_events_in_timeline(self, event_ids): """Given a list of event ids, check if we have already processed and From 2091c91fdec0c90360992b4dbbd71048b940bcb0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 17 Jul 2019 17:34:13 +0100 Subject: [PATCH 54/80] More refactoring in `get_events_as_list` (#5707) We can now use `_get_events_from_cache_or_db` rather than going right back to the database, which means that (a) we can benefit from caching, and (b) it opens the way forward to more extensive checks on the original event. We now always require the original event to exist before we will serve up a redaction. --- changelog.d/5707.bugfix | 1 + synapse/storage/events_worker.py | 68 +++++++----- tests/rest/client/test_redactions.py | 159 +++++++++++++++++++++++++++ tests/unittest.py | 1 + 4 files changed, 200 insertions(+), 29 deletions(-) create mode 100644 changelog.d/5707.bugfix create mode 100644 tests/rest/client/test_redactions.py diff --git a/changelog.d/5707.bugfix b/changelog.d/5707.bugfix new file mode 100644 index 0000000000..aa3046c5e1 --- /dev/null +++ b/changelog.d/5707.bugfix @@ -0,0 +1 @@ +Fix some problems with authenticating redactions in recent room versions. diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index e2fc7bc047..1d969d70be 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -236,38 +236,48 @@ class EventsWorkerStore(SQLBaseStore): "allow_rejected=False" ) - # Starting in room version v3, some redactions need to be rechecked if we - # didn't have the redacted event at the time, so we recheck on read - # instead. - if not allow_rejected and entry.event.type == EventTypes.Redaction: - if entry.event.internal_metadata.need_to_check_redaction(): - # XXX: we should probably use _get_events_from_cache_or_db here, - # so that we can benefit from caching. - orig_sender = yield self._simple_select_one_onecol( - table="events", - keyvalues={"event_id": entry.event.redacts}, - retcol="sender", - allow_none=True, - ) + # We may not have had the original event when we received a redaction, so + # we have to recheck auth now. - expected_domain = get_domain_from_id(entry.event.sender) - if ( - orig_sender - and get_domain_from_id(orig_sender) == expected_domain - ): - # This redaction event is allowed. Mark as not needing a - # recheck. - entry.event.internal_metadata.recheck_redaction = False - else: - # We either don't have the event that is being redacted (so we - # assume that the event isn't authorised for now), or the - # senders don't match (so it will never be authorised). Either - # way, we shouldn't return it. - # - # (If we later receive the event, then we will redact it anyway, - # since we have this redaction) + if not allow_rejected and entry.event.type == EventTypes.Redaction: + redacted_event_id = entry.event.redacts + event_map = yield self._get_events_from_cache_or_db([redacted_event_id]) + original_event_entry = event_map.get(redacted_event_id) + if not original_event_entry: + # we don't have the redacted event (or it was rejected). + # + # We assume that the redaction isn't authorized for now; if the + # redacted event later turns up, the redaction will be re-checked, + # and if it is found valid, the original will get redacted before it + # is served to the client. + logger.debug( + "Withholding redaction event %s since we don't (yet) have the " + "original %s", + event_id, + redacted_event_id, + ) + continue + + original_event = original_event_entry.event + + if entry.event.internal_metadata.need_to_check_redaction(): + original_domain = get_domain_from_id(original_event.sender) + redaction_domain = get_domain_from_id(entry.event.sender) + if original_domain != redaction_domain: + # the senders don't match, so this is forbidden + logger.info( + "Withholding redaction %s whose sender domain %s doesn't " + "match that of redacted event %s %s", + event_id, + redaction_domain, + redacted_event_id, + original_domain, + ) continue + # Update the cache to save doing the checks again. + entry.event.internal_metadata.recheck_redaction = False + if check_redacted and entry.redacted_event: event = entry.redacted_event else: diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py new file mode 100644 index 0000000000..7d5df95855 --- /dev/null +++ b/tests/rest/client/test_redactions.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.rest import admin +from synapse.rest.client.v1 import login, room +from synapse.rest.client.v2_alpha import sync + +from tests.unittest import HomeserverTestCase + + +class RedactionsTestCase(HomeserverTestCase): + """Tests that various redaction events are handled correctly""" + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + sync.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + # register a couple of users + self.mod_user_id = self.register_user("user1", "pass") + self.mod_access_token = self.login("user1", "pass") + self.other_user_id = self.register_user("otheruser", "pass") + self.other_access_token = self.login("otheruser", "pass") + + # Create a room + self.room_id = self.helper.create_room_as( + self.mod_user_id, tok=self.mod_access_token + ) + + # Invite the other user + self.helper.invite( + room=self.room_id, + src=self.mod_user_id, + tok=self.mod_access_token, + targ=self.other_user_id, + ) + # The other user joins + self.helper.join( + room=self.room_id, user=self.other_user_id, tok=self.other_access_token + ) + + def _redact_event(self, access_token, room_id, event_id, expect_code=200): + """Helper function to send a redaction event. + + Returns the json body. + """ + path = "/_matrix/client/r0/rooms/%s/redact/%s" % (room_id, event_id) + + request, channel = self.make_request( + "POST", path, content={}, access_token=access_token + ) + self.render(request) + self.assertEqual(int(channel.result["code"]), expect_code) + return channel.json_body + + def _sync_room_timeline(self, access_token, room_id): + request, channel = self.make_request( + "GET", "sync", access_token=self.mod_access_token + ) + self.render(request) + self.assertEqual(channel.result["code"], b"200") + room_sync = channel.json_body["rooms"]["join"][room_id] + return room_sync["timeline"]["events"] + + def test_redact_event_as_moderator(self): + # as a regular user, send a message to redact + b = self.helper.send(room_id=self.room_id, tok=self.other_access_token) + msg_id = b["event_id"] + + # as the moderator, send a redaction + b = self._redact_event(self.mod_access_token, self.room_id, msg_id) + redaction_id = b["event_id"] + + # now sync + timeline = self._sync_room_timeline(self.mod_access_token, self.room_id) + + # the last event should be the redaction + self.assertEqual(timeline[-1]["event_id"], redaction_id) + self.assertEqual(timeline[-1]["redacts"], msg_id) + + # and the penultimate should be the redacted original + self.assertEqual(timeline[-2]["event_id"], msg_id) + self.assertEqual(timeline[-2]["unsigned"]["redacted_by"], redaction_id) + self.assertEqual(timeline[-2]["content"], {}) + + def test_redact_event_as_normal(self): + # as a regular user, send a message to redact + b = self.helper.send(room_id=self.room_id, tok=self.other_access_token) + normal_msg_id = b["event_id"] + + # also send one as the admin + b = self.helper.send(room_id=self.room_id, tok=self.mod_access_token) + admin_msg_id = b["event_id"] + + # as a normal, try to redact the admin's event + self._redact_event( + self.other_access_token, self.room_id, admin_msg_id, expect_code=403 + ) + + # now try to redact our own event + b = self._redact_event(self.other_access_token, self.room_id, normal_msg_id) + redaction_id = b["event_id"] + + # now sync + timeline = self._sync_room_timeline(self.other_access_token, self.room_id) + + # the last event should be the redaction of the normal event + self.assertEqual(timeline[-1]["event_id"], redaction_id) + self.assertEqual(timeline[-1]["redacts"], normal_msg_id) + + # the penultimate should be the unredacted one from the admin + self.assertEqual(timeline[-2]["event_id"], admin_msg_id) + self.assertNotIn("redacted_by", timeline[-2]["unsigned"]) + self.assertTrue(timeline[-2]["content"]["body"], {}) + + # and the antepenultimate should be the redacted normal + self.assertEqual(timeline[-3]["event_id"], normal_msg_id) + self.assertEqual(timeline[-3]["unsigned"]["redacted_by"], redaction_id) + self.assertEqual(timeline[-3]["content"], {}) + + def test_redact_nonexistent_event(self): + # control case: an existing event + b = self.helper.send(room_id=self.room_id, tok=self.other_access_token) + msg_id = b["event_id"] + b = self._redact_event(self.other_access_token, self.room_id, msg_id) + redaction_id = b["event_id"] + + # room moderators can send redactions for non-existent events + self._redact_event(self.mod_access_token, self.room_id, "$zzz") + + # ... but normals cannot + self._redact_event( + self.other_access_token, self.room_id, "$zzz", expect_code=404 + ) + + # when we sync, we should see only the valid redaction + timeline = self._sync_room_timeline(self.other_access_token, self.room_id) + self.assertEqual(timeline[-1]["event_id"], redaction_id) + self.assertEqual(timeline[-1]["redacts"], msg_id) + + # and the penultimate should be the redacted original + self.assertEqual(timeline[-2]["event_id"], msg_id) + self.assertEqual(timeline[-2]["unsigned"]["redacted_by"], redaction_id) + self.assertEqual(timeline[-2]["content"], {}) diff --git a/tests/unittest.py b/tests/unittest.py index cabe787cb4..f5fae21317 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -447,6 +447,7 @@ class HomeserverTestCase(TestCase): # Create the user request, channel = self.make_request("GET", "/_matrix/client/r0/admin/register") self.render(request) + self.assertEqual(channel.code, 200) nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) From 1def298119502a8aaf58ba431286e63a7f38e046 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 17 Jul 2019 17:47:07 +0100 Subject: [PATCH 55/80] Improve `Depends` specs in debian package. (#5675) This is basically a contrived way of adding a `Recommends` on `libpq5`, to fix #5653. The way this is supposed to happen in debhelper is to run `dh_shlibdeps`, which in turn runs `dpkg-shlibdeps`, which spits things out into `debian/.substvars` whence they can later be included by `control`. Previously, we had disabled `dh_shlibdeps`, mostly because `dpkg-shlibdeps` gets confused about PIL's interdependent objects, but that's not really the right thing to do and there is another way to work around that. Since we don't always use postgres, we don't necessarily want a hard Depends on libpq5, so I've actually ended up adding an explicit invocation of `dpkg-shlibdeps` for `psycopg2`. I've also updated the build-depends list for the package, which was missing a couple of entries. --- changelog.d/5675.doc | 1 + debian/changelog | 3 +++ debian/control | 7 +++++++ debian/rules | 14 ++++++++++++++ docker/Dockerfile-dhvirtualenv | 3 +++ docs/postgres.rst | 8 +++++--- 6 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5675.doc diff --git a/changelog.d/5675.doc b/changelog.d/5675.doc new file mode 100644 index 0000000000..4cd4d0be1a --- /dev/null +++ b/changelog.d/5675.doc @@ -0,0 +1 @@ +Minor tweaks to postgres documentation. diff --git a/debian/changelog b/debian/changelog index b964cf90a2..8aba444f1d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,9 @@ matrix-synapse-py3 (1.1.0-1) UNRELEASED; urgency=medium [ Amber Brown ] * Update logging config defaults to match API changes in Synapse. + [ Richard van der Hoff ] + * Add Recommends and Depends for some libraries which you probably want. + -- Erik Johnston Thu, 04 Jul 2019 13:59:02 +0100 matrix-synapse-py3 (1.1.0) stable; urgency=medium diff --git a/debian/control b/debian/control index 4abfa02051..9e679c9d42 100644 --- a/debian/control +++ b/debian/control @@ -2,16 +2,20 @@ Source: matrix-synapse-py3 Section: contrib/python Priority: extra Maintainer: Synapse Packaging team +# keep this list in sync with the build dependencies in docker/Dockerfile-dhvirtualenv. Build-Depends: debhelper (>= 9), dh-systemd, dh-virtualenv (>= 1.1), + libsystemd-dev, + libpq-dev, lsb-release, python3-dev, python3, python3-setuptools, python3-pip, python3-venv, + libsqlite3-dev, tar, Standards-Version: 3.9.8 Homepage: https://github.com/matrix-org/synapse @@ -28,9 +32,12 @@ Depends: debconf, python3-distutils|libpython3-stdlib (<< 3.6), ${misc:Depends}, + ${shlibs:Depends}, ${synapse:pydepends}, # some of our scripts use perl, but none of them are important, # so we put perl:Depends in Suggests rather than Depends. +Recommends: + ${shlibs1:Recommends}, Suggests: sqlite3, ${perl:Depends}, diff --git a/debian/rules b/debian/rules index 05cbbdde08..a4d2ce2ba4 100755 --- a/debian/rules +++ b/debian/rules @@ -3,15 +3,29 @@ # Build Debian package using https://github.com/spotify/dh-virtualenv # +# assume we only have one package +PACKAGE_NAME:=`dh_listpackages` + override_dh_systemd_enable: dh_systemd_enable --name=matrix-synapse override_dh_installinit: dh_installinit --name=matrix-synapse +# we don't really want to strip the symbols from our object files. override_dh_strip: override_dh_shlibdeps: + # make the postgres package's dependencies a recommendation + # rather than a hard dependency. + find debian/$(PACKAGE_NAME)/ -path '*/site-packages/psycopg2/*.so' | \ + xargs dpkg-shlibdeps -Tdebian/$(PACKAGE_NAME).substvars \ + -pshlibs1 -dRecommends + + # all the other dependencies can be normal 'Depends' requirements, + # except for PIL's, which is self-contained and which confuses + # dpkg-shlibdeps. + dh_shlibdeps -X site-packages/PIL/.libs -X site-packages/psycopg2 override_dh_virtualenv: ./debian/build_virtualenv diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv index ceedbad68a..0117ab8bcc 100644 --- a/docker/Dockerfile-dhvirtualenv +++ b/docker/Dockerfile-dhvirtualenv @@ -43,6 +43,9 @@ RUN cd dh-virtualenv-1.1 && dpkg-buildpackage -us -uc -b FROM ${distro} # Install the build dependencies +# +# NB: keep this list in sync with the list of build-deps in debian/control +# TODO: it would be nice to do that automatically. RUN apt-get update -qq -o Acquire::Languages=none \ && env DEBIAN_FRONTEND=noninteractive apt-get install \ -yqq --no-install-recommends -o Dpkg::Options::=--force-unsafe-io \ diff --git a/docs/postgres.rst b/docs/postgres.rst index 0ae52ccbd8..e08a5116b9 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -11,7 +11,9 @@ a postgres database. * If you are using the `matrix.org debian/ubuntu packages <../INSTALL.md#matrixorg-packages>`_, - the necessary libraries will already be installed. + the necessary python library will already be installed, but you will need to + ensure the low-level postgres library is installed, which you can do with + ``apt install libpq5``. * For other pre-built packages, please consult the documentation from the relevant package. @@ -34,7 +36,7 @@ Assuming your PostgreSQL database user is called ``postgres``, create a user su - postgres createuser --pwprompt synapse_user -Before you can authenticate with the ``synapse_user``, you must create a +Before you can authenticate with the ``synapse_user``, you must create a database that it can access. To create a database, first connect to the database with your database user:: @@ -53,7 +55,7 @@ and then run:: This would create an appropriate database named ``synapse`` owned by the ``synapse_user`` user (which must already have been created as above). -Note that the PostgreSQL database *must* have the correct encoding set (as +Note that the PostgreSQL database *must* have the correct encoding set (as shown above), otherwise it will not be able to store UTF8 strings. You may need to enable password authentication so ``synapse_user`` can connect From 9c70a02a9cddf36521c3d6ae6f72e3b46a5f4c2d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 17 Jul 2019 19:08:02 +0100 Subject: [PATCH 56/80] Ignore redactions of m.room.create events (#5701) --- changelog.d/5701.bugfix | 1 + synapse/api/auth.py | 15 ------------- synapse/handlers/message.py | 33 ++++++++++++++++++++-------- synapse/storage/events_worker.py | 12 ++++++++++ tests/rest/client/test_redactions.py | 20 +++++++++++++++++ 5 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 changelog.d/5701.bugfix diff --git a/changelog.d/5701.bugfix b/changelog.d/5701.bugfix new file mode 100644 index 0000000000..fd2866e16a --- /dev/null +++ b/changelog.d/5701.bugfix @@ -0,0 +1 @@ +Ignore redactions of m.room.create events. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index d9e943c39c..7ce6540bdd 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -606,21 +606,6 @@ class Auth(object): defer.returnValue(auth_ids) - def check_redaction(self, room_version, event, auth_events): - """Check whether the event sender is allowed to redact the target event. - - Returns: - True if the the sender is allowed to redact the target event if the - target event was created by them. - False if the sender is allowed to redact the target event with no - further checks. - - Raises: - AuthError if the event sender is definitely not allowed to redact - the target event. - """ - return event_auth.check_redaction(room_version, event, auth_events) - @defer.inlineCallbacks def check_can_change_room_list(self, room_id, user): """Check if the user is allowed to edit the room's entry in the diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index eaeda7a5cb..6d7a987f13 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,6 +23,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer from twisted.internet.defer import succeed +from synapse import event_auth from synapse.api.constants import EventTypes, Membership, RelationTypes from synapse.api.errors import ( AuthError, @@ -784,6 +785,20 @@ class EventCreationHandler(object): event.signatures.update(returned_invite.signatures) if event.type == EventTypes.Redaction: + original_event = yield self.store.get_event( + event.redacts, + check_redacted=False, + get_prev_content=False, + allow_rejected=False, + allow_none=True, + check_room_id=event.room_id, + ) + + # we can make some additional checks now if we have the original event. + if original_event: + if original_event.type == EventTypes.Create: + raise AuthError(403, "Redacting create events is not permitted") + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.auth.compute_auth_events( event, prev_state_ids, for_verification=True @@ -791,18 +806,18 @@ class EventCreationHandler(object): auth_events = yield self.store.get_events(auth_events_ids) auth_events = {(e.type, e.state_key): e for e in auth_events.values()} room_version = yield self.store.get_room_version(event.room_id) - if self.auth.check_redaction(room_version, event, auth_events=auth_events): - original_event = yield self.store.get_event( - event.redacts, - check_redacted=False, - get_prev_content=False, - allow_rejected=False, - allow_none=False, - ) + + if event_auth.check_redaction(room_version, event, auth_events=auth_events): + # this user doesn't have 'redact' rights, so we need to do some more + # checks on the original event. Let's start by checking the original + # event exists. + if not original_event: + raise NotFoundError("Could not find event %s" % (event.redacts,)) + if event.user_id != original_event.user_id: raise AuthError(403, "You don't have permission to redact events") - # We've already checked. + # all the checks are done. event.internal_metadata.recheck_redaction = False if event.type == EventTypes.Create: diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 1d969d70be..858fc755a1 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -259,6 +259,14 @@ class EventsWorkerStore(SQLBaseStore): continue original_event = original_event_entry.event + if original_event.type == EventTypes.Create: + # we never serve redactions of Creates to clients. + logger.info( + "Withholding redaction %s of create event %s", + event_id, + redacted_event_id, + ) + continue if entry.event.internal_metadata.need_to_check_redaction(): original_domain = get_domain_from_id(original_event.sender) @@ -617,6 +625,10 @@ class EventsWorkerStore(SQLBaseStore): Deferred[EventBase|None]: if the event should be redacted, a pruned event object. Otherwise, None. """ + if original_ev.type == "m.room.create": + # we choose to ignore redactions of m.room.create events. + return None + redaction_map = yield self._get_events_from_cache_or_db(redactions) for redaction_id in redactions: diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index 7d5df95855..fe66e397c4 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -157,3 +157,23 @@ class RedactionsTestCase(HomeserverTestCase): self.assertEqual(timeline[-2]["event_id"], msg_id) self.assertEqual(timeline[-2]["unsigned"]["redacted_by"], redaction_id) self.assertEqual(timeline[-2]["content"], {}) + + def test_redact_create_event(self): + # control case: an existing event + b = self.helper.send(room_id=self.room_id, tok=self.mod_access_token) + msg_id = b["event_id"] + self._redact_event(self.mod_access_token, self.room_id, msg_id) + + # sync the room, to get the id of the create event + timeline = self._sync_room_timeline(self.other_access_token, self.room_id) + create_event_id = timeline[0]["event_id"] + + # room moderators cannot send redactions for create events + self._redact_event( + self.mod_access_token, self.room_id, create_event_id, expect_code=403 + ) + + # and nor can normals + self._redact_event( + self.other_access_token, self.room_id, create_event_id, expect_code=403 + ) From fa8271c5ac034bdffe080dd6b0591bef7aeadd81 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 18 Jul 2019 11:46:47 +0100 Subject: [PATCH 57/80] Convert synapse.federation.transport.server to async (#5689) * Convert BaseFederationServlet._wrap to async Empirically, this fixes some lost stacktraces. It should be safe because the wrapped function is called from JsonResource._async_render, which is already async. * Convert the rest of synapse.federation.transport.server to async We may as well do the whole file while we're here. * changelog * flake8 --- changelog.d/5689.misc | 1 + synapse/federation/transport/server.py | 430 +++++++++++-------------- 2 files changed, 189 insertions(+), 242 deletions(-) create mode 100644 changelog.d/5689.misc diff --git a/changelog.d/5689.misc b/changelog.d/5689.misc new file mode 100644 index 0000000000..8aa3e3f6a2 --- /dev/null +++ b/changelog.d/5689.misc @@ -0,0 +1 @@ +Convert `synapse.federation.transport.server` to `async`. Might improve some stack traces. \ No newline at end of file diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index c45d458d94..663264dec4 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd # Copyright 2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,8 +19,6 @@ import functools import logging import re -from twisted.internet import defer - import synapse import synapse.logging.opentracing as opentracing from synapse.api.errors import Codes, FederationDeniedError, SynapseError @@ -103,8 +102,7 @@ class Authenticator(object): self.federation_domain_whitelist = hs.config.federation_domain_whitelist # A method just so we can pass 'self' as the authenticator to the Servlets - @defer.inlineCallbacks - def authenticate_request(self, request, content): + async def authenticate_request(self, request, content): now = self._clock.time_msec() json_request = { "method": request.method.decode("ascii"), @@ -142,7 +140,7 @@ class Authenticator(object): 401, "Missing Authorization headers", Codes.UNAUTHORIZED ) - yield self.keyring.verify_json_for_server( + await self.keyring.verify_json_for_server( origin, json_request, now, "Incoming request" ) @@ -151,17 +149,16 @@ class Authenticator(object): # If we get a valid signed request from the other side, its probably # alive - retry_timings = yield self.store.get_destination_retry_timings(origin) + retry_timings = await self.store.get_destination_retry_timings(origin) if retry_timings and retry_timings["retry_last_ts"]: run_in_background(self._reset_retry_timings, origin) - defer.returnValue(origin) + return origin - @defer.inlineCallbacks - def _reset_retry_timings(self, origin): + async def _reset_retry_timings(self, origin): try: logger.info("Marking origin %r as up", origin) - yield self.store.set_destination_retry_timings(origin, 0, 0) + await self.store.set_destination_retry_timings(origin, 0, 0) except Exception: logger.exception("Error resetting retry timings on %s", origin) @@ -215,7 +212,8 @@ class BaseFederationServlet(object): match against the request path (excluding the /federation/v1 prefix). The servlet should also implement one or more of on_GET, on_POST, on_PUT, to match - the appropriate HTTP method. These methods have the signature: + the appropriate HTTP method. These methods must be *asynchronous* and have the + signature: on_(self, origin, content, query, **kwargs) @@ -235,7 +233,7 @@ class BaseFederationServlet(object): components as specified in the path match regexp. Returns: - Deferred[(int, object)|None]: either (response code, response object) to + Optional[Tuple[int, object]]: either (response code, response object) to return a JSON response, or None if the request has already been handled. Raises: @@ -258,10 +256,9 @@ class BaseFederationServlet(object): authenticator = self.authenticator ratelimiter = self.ratelimiter - @defer.inlineCallbacks @functools.wraps(func) - def new_func(request, *args, **kwargs): - """ A callback which can be passed to HttpServer.RegisterPaths + async def new_func(request, *args, **kwargs): + """A callback which can be passed to HttpServer.RegisterPaths Args: request (twisted.web.http.Request): @@ -270,8 +267,8 @@ class BaseFederationServlet(object): components as specified in the path match regexp. Returns: - Deferred[(int, object)|None]: (response code, response object) as returned - by the callback method. None if the request has already been handled. + Tuple[int, object]|None: (response code, response object) as returned by + the callback method. None if the request has already been handled. """ content = None if request.method in [b"PUT", b"POST"]: @@ -279,7 +276,7 @@ class BaseFederationServlet(object): content = parse_json_object_from_request(request) try: - origin = yield authenticator.authenticate_request(request, content) + origin = await authenticator.authenticate_request(request, content) except NoAuthenticationError: origin = None if self.REQUIRE_AUTH: @@ -304,16 +301,16 @@ class BaseFederationServlet(object): ): if origin: with ratelimiter.ratelimit(origin) as d: - yield d - response = yield func( + await d + response = await func( origin, content, request.args, *args, **kwargs ) else: - response = yield func( + response = await func( origin, content, request.args, *args, **kwargs ) - defer.returnValue(response) + return response # Extra logic that functools.wraps() doesn't finish new_func.__self__ = func.__self__ @@ -341,8 +338,7 @@ class FederationSendServlet(BaseFederationServlet): self.server_name = server_name # This is when someone is trying to send us a bunch of data. - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, transaction_id): + async def on_PUT(self, origin, content, query, transaction_id): """ Called on PUT /send// Args: @@ -351,7 +347,7 @@ class FederationSendServlet(BaseFederationServlet): request. This is *not* None. Returns: - Deferred: Results in a tuple of `(code, response)`, where + Tuple of `(code, response)`, where `response` is a python dict to be converted into JSON that is used as the response body. """ @@ -380,34 +376,33 @@ class FederationSendServlet(BaseFederationServlet): except Exception as e: logger.exception(e) - defer.returnValue((400, {"error": "Invalid transaction"})) - return + return 400, {"error": "Invalid transaction"} try: - code, response = yield self.handler.on_incoming_transaction( + code, response = await self.handler.on_incoming_transaction( origin, transaction_data ) except Exception: logger.exception("on_incoming_transaction failed") raise - defer.returnValue((code, response)) + return code, response class FederationEventServlet(BaseFederationServlet): PATH = "/event/(?P[^/]*)/?" # This is when someone asks for a data item for a given server data_id pair. - def on_GET(self, origin, content, query, event_id): - return self.handler.on_pdu_request(origin, event_id) + async def on_GET(self, origin, content, query, event_id): + return await self.handler.on_pdu_request(origin, event_id) class FederationStateServlet(BaseFederationServlet): PATH = "/state/(?P[^/]*)/?" # This is when someone asks for all data for a given context. - def on_GET(self, origin, content, query, context): - return self.handler.on_context_state_request( + async def on_GET(self, origin, content, query, context): + return await self.handler.on_context_state_request( origin, context, parse_string_from_args(query, "event_id", None, required=True), @@ -417,8 +412,8 @@ class FederationStateServlet(BaseFederationServlet): class FederationStateIdsServlet(BaseFederationServlet): PATH = "/state_ids/(?P[^/]*)/?" - def on_GET(self, origin, content, query, room_id): - return self.handler.on_state_ids_request( + async def on_GET(self, origin, content, query, room_id): + return await self.handler.on_state_ids_request( origin, room_id, parse_string_from_args(query, "event_id", None, required=True), @@ -428,22 +423,22 @@ class FederationStateIdsServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P[^/]*)/?" - def on_GET(self, origin, content, query, context): + async def on_GET(self, origin, content, query, context): versions = [x.decode("ascii") for x in query[b"v"]] limit = parse_integer_from_args(query, "limit", None) if not limit: - return defer.succeed((400, {"error": "Did not include limit param"})) + return 400, {"error": "Did not include limit param"} - return self.handler.on_backfill_request(origin, context, versions, limit) + return await self.handler.on_backfill_request(origin, context, versions, limit) class FederationQueryServlet(BaseFederationServlet): PATH = "/query/(?P[^/]*)" # This is when we receive a server-server Query - def on_GET(self, origin, content, query, query_type): - return self.handler.on_query_request( + async def on_GET(self, origin, content, query, query_type): + return await self.handler.on_query_request( query_type, {k.decode("utf8"): v[0].decode("utf-8") for k, v in query.items()}, ) @@ -452,8 +447,7 @@ class FederationQueryServlet(BaseFederationServlet): class FederationMakeJoinServlet(BaseFederationServlet): PATH = "/make_join/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_GET(self, origin, _content, query, context, user_id): + async def on_GET(self, origin, _content, query, context, user_id): """ Args: origin (unicode): The authenticated server_name of the calling server @@ -466,8 +460,7 @@ class FederationMakeJoinServlet(BaseFederationServlet): components as specified in the path match regexp. Returns: - Deferred[(int, object)|None]: either (response code, response object) to - return a JSON response, or None if the request has already been handled. + Tuple[int, object]: (response code, response object) """ versions = query.get(b"ver") if versions is not None: @@ -475,64 +468,60 @@ class FederationMakeJoinServlet(BaseFederationServlet): else: supported_versions = ["1"] - content = yield self.handler.on_make_join_request( + content = await self.handler.on_make_join_request( origin, context, user_id, supported_versions=supported_versions ) - defer.returnValue((200, content)) + return 200, content class FederationMakeLeaveServlet(BaseFederationServlet): PATH = "/make_leave/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, context, user_id): - content = yield self.handler.on_make_leave_request(origin, context, user_id) - defer.returnValue((200, content)) + async def on_GET(self, origin, content, query, context, user_id): + content = await self.handler.on_make_leave_request(origin, context, user_id) + return 200, content class FederationSendLeaveServlet(BaseFederationServlet): PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, room_id, event_id): - content = yield self.handler.on_send_leave_request(origin, content, room_id) - defer.returnValue((200, content)) + async def on_PUT(self, origin, content, query, room_id, event_id): + content = await self.handler.on_send_leave_request(origin, content, room_id) + return 200, content class FederationEventAuthServlet(BaseFederationServlet): PATH = "/event_auth/(?P[^/]*)/(?P[^/]*)" - def on_GET(self, origin, content, query, context, event_id): - return self.handler.on_event_auth(origin, context, event_id) + async def on_GET(self, origin, content, query, context, event_id): + return await self.handler.on_event_auth(origin, context, event_id) class FederationSendJoinServlet(BaseFederationServlet): PATH = "/send_join/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, context, event_id): + async def on_PUT(self, origin, content, query, context, event_id): # TODO(paul): assert that context/event_id parsed from path actually # match those given in content - content = yield self.handler.on_send_join_request(origin, content, context) - defer.returnValue((200, content)) + content = await self.handler.on_send_join_request(origin, content, context) + return 200, content class FederationV1InviteServlet(BaseFederationServlet): PATH = "/invite/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, context, event_id): + async def on_PUT(self, origin, content, query, context, event_id): # We don't get a room version, so we have to assume its EITHER v1 or # v2. This is "fine" as the only difference between V1 and V2 is the # state resolution algorithm, and we don't use that for processing # invites - content = yield self.handler.on_invite_request( + content = await self.handler.on_invite_request( origin, content, room_version=RoomVersions.V1.identifier ) # V1 federation API is defined to return a content of `[200, {...}]` # due to a historical bug. - defer.returnValue((200, (200, content))) + return 200, (200, content) class FederationV2InviteServlet(BaseFederationServlet): @@ -540,8 +529,7 @@ class FederationV2InviteServlet(BaseFederationServlet): PREFIX = FEDERATION_V2_PREFIX - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, context, event_id): + async def on_PUT(self, origin, content, query, context, event_id): # TODO(paul): assert that context/event_id parsed from path actually # match those given in content @@ -554,69 +542,65 @@ class FederationV2InviteServlet(BaseFederationServlet): event.setdefault("unsigned", {})["invite_room_state"] = invite_room_state - content = yield self.handler.on_invite_request( + content = await self.handler.on_invite_request( origin, event, room_version=room_version ) - defer.returnValue((200, content)) + return 200, content class FederationThirdPartyInviteExchangeServlet(BaseFederationServlet): PATH = "/exchange_third_party_invite/(?P[^/]*)" - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, room_id): - content = yield self.handler.on_exchange_third_party_invite_request( + async def on_PUT(self, origin, content, query, room_id): + content = await self.handler.on_exchange_third_party_invite_request( origin, room_id, content ) - defer.returnValue((200, content)) + return 200, content class FederationClientKeysQueryServlet(BaseFederationServlet): PATH = "/user/keys/query" - def on_POST(self, origin, content, query): - return self.handler.on_query_client_keys(origin, content) + async def on_POST(self, origin, content, query): + return await self.handler.on_query_client_keys(origin, content) class FederationUserDevicesQueryServlet(BaseFederationServlet): PATH = "/user/devices/(?P[^/]*)" - def on_GET(self, origin, content, query, user_id): - return self.handler.on_query_user_devices(origin, user_id) + async def on_GET(self, origin, content, query, user_id): + return await self.handler.on_query_user_devices(origin, user_id) class FederationClientKeysClaimServlet(BaseFederationServlet): PATH = "/user/keys/claim" - @defer.inlineCallbacks - def on_POST(self, origin, content, query): - response = yield self.handler.on_claim_client_keys(origin, content) - defer.returnValue((200, response)) + async def on_POST(self, origin, content, query): + response = await self.handler.on_claim_client_keys(origin, content) + return 200, response class FederationQueryAuthServlet(BaseFederationServlet): PATH = "/query_auth/(?P[^/]*)/(?P[^/]*)" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, context, event_id): - new_content = yield self.handler.on_query_auth_request( + async def on_POST(self, origin, content, query, context, event_id): + new_content = await self.handler.on_query_auth_request( origin, content, context, event_id ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGetMissingEventsServlet(BaseFederationServlet): # TODO(paul): Why does this path alone end with "/?" optional? PATH = "/get_missing_events/(?P[^/]*)/?" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, room_id): + async def on_POST(self, origin, content, query, room_id): limit = int(content.get("limit", 10)) earliest_events = content.get("earliest_events", []) latest_events = content.get("latest_events", []) - content = yield self.handler.on_get_missing_events( + content = await self.handler.on_get_missing_events( origin, room_id=room_id, earliest_events=earliest_events, @@ -624,7 +608,7 @@ class FederationGetMissingEventsServlet(BaseFederationServlet): limit=limit, ) - defer.returnValue((200, content)) + return 200, content class On3pidBindServlet(BaseFederationServlet): @@ -632,8 +616,7 @@ class On3pidBindServlet(BaseFederationServlet): REQUIRE_AUTH = False - @defer.inlineCallbacks - def on_POST(self, origin, content, query): + async def on_POST(self, origin, content, query): if "invites" in content: last_exception = None for invite in content["invites"]: @@ -645,7 +628,7 @@ class On3pidBindServlet(BaseFederationServlet): ) logger.info(message) raise SynapseError(400, message) - yield self.handler.exchange_third_party_invite( + await self.handler.exchange_third_party_invite( invite["sender"], invite["mxid"], invite["room_id"], @@ -655,7 +638,7 @@ class On3pidBindServlet(BaseFederationServlet): last_exception = e if last_exception: raise last_exception - defer.returnValue((200, {})) + return 200, {} class OpenIdUserInfo(BaseFederationServlet): @@ -679,29 +662,26 @@ class OpenIdUserInfo(BaseFederationServlet): REQUIRE_AUTH = False - @defer.inlineCallbacks - def on_GET(self, origin, content, query): + async def on_GET(self, origin, content, query): token = query.get(b"access_token", [None])[0] if token is None: - defer.returnValue( - (401, {"errcode": "M_MISSING_TOKEN", "error": "Access Token required"}) + return ( + 401, + {"errcode": "M_MISSING_TOKEN", "error": "Access Token required"}, ) - return - user_id = yield self.handler.on_openid_userinfo(token.decode("ascii")) + user_id = await self.handler.on_openid_userinfo(token.decode("ascii")) if user_id is None: - defer.returnValue( - ( - 401, - { - "errcode": "M_UNKNOWN_TOKEN", - "error": "Access Token unknown or expired", - }, - ) + return ( + 401, + { + "errcode": "M_UNKNOWN_TOKEN", + "error": "Access Token unknown or expired", + }, ) - defer.returnValue((200, {"sub": user_id})) + return 200, {"sub": user_id} class PublicRoomList(BaseFederationServlet): @@ -743,8 +723,7 @@ class PublicRoomList(BaseFederationServlet): ) self.allow_access = allow_access - @defer.inlineCallbacks - def on_GET(self, origin, content, query): + async def on_GET(self, origin, content, query): if not self.allow_access: raise FederationDeniedError(origin) @@ -764,10 +743,10 @@ class PublicRoomList(BaseFederationServlet): else: network_tuple = ThirdPartyInstanceID(None, None) - data = yield self.handler.get_local_public_room_list( + data = await self.handler.get_local_public_room_list( limit, since_token, network_tuple=network_tuple, from_federation=True ) - defer.returnValue((200, data)) + return 200, data class FederationVersionServlet(BaseFederationServlet): @@ -775,12 +754,10 @@ class FederationVersionServlet(BaseFederationServlet): REQUIRE_AUTH = False - def on_GET(self, origin, content, query): - return defer.succeed( - ( - 200, - {"server": {"name": "Synapse", "version": get_version_string(synapse)}}, - ) + async def on_GET(self, origin, content, query): + return ( + 200, + {"server": {"name": "Synapse", "version": get_version_string(synapse)}}, ) @@ -790,41 +767,38 @@ class FederationGroupsProfileServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/profile" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.get_group_profile(group_id, requester_user_id) + new_content = await self.handler.get_group_profile(group_id, requester_user_id) - defer.returnValue((200, new_content)) + return 200, new_content - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id): + async def on_POST(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.update_group_profile( + new_content = await self.handler.update_group_profile( group_id, requester_user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsSummaryServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/summary" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.get_group_summary(group_id, requester_user_id) + new_content = await self.handler.get_group_summary(group_id, requester_user_id) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsRoomsServlet(BaseFederationServlet): @@ -833,15 +807,14 @@ class FederationGroupsRoomsServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/rooms" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.get_rooms_in_group(group_id, requester_user_id) + new_content = await self.handler.get_rooms_in_group(group_id, requester_user_id) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsAddRoomsServlet(BaseFederationServlet): @@ -850,29 +823,27 @@ class FederationGroupsAddRoomsServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, room_id): + async def on_POST(self, origin, content, query, group_id, room_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.add_room_to_group( + new_content = await self.handler.add_room_to_group( group_id, requester_user_id, room_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content - @defer.inlineCallbacks - def on_DELETE(self, origin, content, query, group_id, room_id): + async def on_DELETE(self, origin, content, query, group_id, room_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.remove_room_from_group( + new_content = await self.handler.remove_room_from_group( group_id, requester_user_id, room_id ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): @@ -884,17 +855,16 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): "/config/(?P[^/]*)" ) - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, room_id, config_key): + async def on_POST(self, origin, content, query, group_id, room_id, config_key): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - result = yield self.groups_handler.update_room_in_group( + result = await self.groups_handler.update_room_in_group( group_id, requester_user_id, room_id, config_key, content ) - defer.returnValue((200, result)) + return 200, result class FederationGroupsUsersServlet(BaseFederationServlet): @@ -903,15 +873,14 @@ class FederationGroupsUsersServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/users" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.get_users_in_group(group_id, requester_user_id) + new_content = await self.handler.get_users_in_group(group_id, requester_user_id) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsInvitedUsersServlet(BaseFederationServlet): @@ -920,17 +889,16 @@ class FederationGroupsInvitedUsersServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/invited_users" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.get_invited_users_in_group( + new_content = await self.handler.get_invited_users_in_group( group_id, requester_user_id ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsInviteServlet(BaseFederationServlet): @@ -939,17 +907,16 @@ class FederationGroupsInviteServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.invite_to_group( + new_content = await self.handler.invite_to_group( group_id, user_id, requester_user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsAcceptInviteServlet(BaseFederationServlet): @@ -958,14 +925,13 @@ class FederationGroupsAcceptInviteServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): if get_domain_from_id(user_id) != origin: raise SynapseError(403, "user_id doesn't match origin") - new_content = yield self.handler.accept_invite(group_id, user_id, content) + new_content = await self.handler.accept_invite(group_id, user_id, content) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsJoinServlet(BaseFederationServlet): @@ -974,14 +940,13 @@ class FederationGroupsJoinServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): if get_domain_from_id(user_id) != origin: raise SynapseError(403, "user_id doesn't match origin") - new_content = yield self.handler.join_group(group_id, user_id, content) + new_content = await self.handler.join_group(group_id, user_id, content) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsRemoveUserServlet(BaseFederationServlet): @@ -990,17 +955,16 @@ class FederationGroupsRemoveUserServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.remove_user_from_group( + new_content = await self.handler.remove_user_from_group( group_id, user_id, requester_user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsLocalInviteServlet(BaseFederationServlet): @@ -1009,14 +973,13 @@ class FederationGroupsLocalInviteServlet(BaseFederationServlet): PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): if get_domain_from_id(group_id) != origin: raise SynapseError(403, "group_id doesn't match origin") - new_content = yield self.handler.on_invite(group_id, user_id, content) + new_content = await self.handler.on_invite(group_id, user_id, content) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): @@ -1025,16 +988,15 @@ class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): if get_domain_from_id(group_id) != origin: raise SynapseError(403, "user_id doesn't match origin") - new_content = yield self.handler.user_removed_from_group( + new_content = await self.handler.user_removed_from_group( group_id, user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): @@ -1043,15 +1005,14 @@ class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)" - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, user_id): + async def on_POST(self, origin, content, query, group_id, user_id): # We don't need to check auth here as we check the attestation signatures - new_content = yield self.handler.on_renew_attestation( + new_content = await self.handler.on_renew_attestation( group_id, user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): @@ -1068,8 +1029,7 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): "/rooms/(?P[^/]*)" ) - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, category_id, room_id): + async def on_POST(self, origin, content, query, group_id, category_id, room_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1077,7 +1037,7 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): if category_id == "": raise SynapseError(400, "category_id cannot be empty string") - resp = yield self.handler.update_group_summary_room( + resp = await self.handler.update_group_summary_room( group_id, requester_user_id, room_id=room_id, @@ -1085,10 +1045,9 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): content=content, ) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_DELETE(self, origin, content, query, group_id, category_id, room_id): + async def on_DELETE(self, origin, content, query, group_id, category_id, room_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1096,11 +1055,11 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): if category_id == "": raise SynapseError(400, "category_id cannot be empty string") - resp = yield self.handler.delete_group_summary_room( + resp = await self.handler.delete_group_summary_room( group_id, requester_user_id, room_id=room_id, category_id=category_id ) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsCategoriesServlet(BaseFederationServlet): @@ -1109,15 +1068,14 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/categories/?" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - resp = yield self.handler.get_group_categories(group_id, requester_user_id) + resp = await self.handler.get_group_categories(group_id, requester_user_id) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsCategoryServlet(BaseFederationServlet): @@ -1126,20 +1084,18 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/categories/(?P[^/]+)" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id, category_id): + async def on_GET(self, origin, content, query, group_id, category_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - resp = yield self.handler.get_group_category( + resp = await self.handler.get_group_category( group_id, requester_user_id, category_id ) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, category_id): + async def on_POST(self, origin, content, query, group_id, category_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1147,14 +1103,13 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): if category_id == "": raise SynapseError(400, "category_id cannot be empty string") - resp = yield self.handler.upsert_group_category( + resp = await self.handler.upsert_group_category( group_id, requester_user_id, category_id, content ) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_DELETE(self, origin, content, query, group_id, category_id): + async def on_DELETE(self, origin, content, query, group_id, category_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1162,11 +1117,11 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): if category_id == "": raise SynapseError(400, "category_id cannot be empty string") - resp = yield self.handler.delete_group_category( + resp = await self.handler.delete_group_category( group_id, requester_user_id, category_id ) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsRolesServlet(BaseFederationServlet): @@ -1175,15 +1130,14 @@ class FederationGroupsRolesServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/roles/?" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id): + async def on_GET(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - resp = yield self.handler.get_group_roles(group_id, requester_user_id) + resp = await self.handler.get_group_roles(group_id, requester_user_id) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsRoleServlet(BaseFederationServlet): @@ -1192,18 +1146,16 @@ class FederationGroupsRoleServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/roles/(?P[^/]+)" - @defer.inlineCallbacks - def on_GET(self, origin, content, query, group_id, role_id): + async def on_GET(self, origin, content, query, group_id, role_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - resp = yield self.handler.get_group_role(group_id, requester_user_id, role_id) + resp = await self.handler.get_group_role(group_id, requester_user_id, role_id) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, role_id): + async def on_POST(self, origin, content, query, group_id, role_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1211,14 +1163,13 @@ class FederationGroupsRoleServlet(BaseFederationServlet): if role_id == "": raise SynapseError(400, "role_id cannot be empty string") - resp = yield self.handler.update_group_role( + resp = await self.handler.update_group_role( group_id, requester_user_id, role_id, content ) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_DELETE(self, origin, content, query, group_id, role_id): + async def on_DELETE(self, origin, content, query, group_id, role_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1226,11 +1177,11 @@ class FederationGroupsRoleServlet(BaseFederationServlet): if role_id == "": raise SynapseError(400, "role_id cannot be empty string") - resp = yield self.handler.delete_group_role( + resp = await self.handler.delete_group_role( group_id, requester_user_id, role_id ) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsSummaryUsersServlet(BaseFederationServlet): @@ -1247,8 +1198,7 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): "/users/(?P[^/]*)" ) - @defer.inlineCallbacks - def on_POST(self, origin, content, query, group_id, role_id, user_id): + async def on_POST(self, origin, content, query, group_id, role_id, user_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1256,7 +1206,7 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): if role_id == "": raise SynapseError(400, "role_id cannot be empty string") - resp = yield self.handler.update_group_summary_user( + resp = await self.handler.update_group_summary_user( group_id, requester_user_id, user_id=user_id, @@ -1264,10 +1214,9 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): content=content, ) - defer.returnValue((200, resp)) + return 200, resp - @defer.inlineCallbacks - def on_DELETE(self, origin, content, query, group_id, role_id, user_id): + async def on_DELETE(self, origin, content, query, group_id, role_id, user_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") @@ -1275,11 +1224,11 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): if role_id == "": raise SynapseError(400, "role_id cannot be empty string") - resp = yield self.handler.delete_group_summary_user( + resp = await self.handler.delete_group_summary_user( group_id, requester_user_id, user_id=user_id, role_id=role_id ) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): @@ -1288,13 +1237,12 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): PATH = "/get_groups_publicised" - @defer.inlineCallbacks - def on_POST(self, origin, content, query): - resp = yield self.handler.bulk_get_publicised_groups( + async def on_POST(self, origin, content, query): + resp = await self.handler.bulk_get_publicised_groups( content["user_ids"], proxy=False ) - defer.returnValue((200, resp)) + return 200, resp class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): @@ -1303,17 +1251,16 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): PATH = "/groups/(?P[^/]*)/settings/m.join_policy" - @defer.inlineCallbacks - def on_PUT(self, origin, content, query, group_id): + async def on_PUT(self, origin, content, query, group_id): requester_user_id = parse_string_from_args(query, "requester_user_id") if get_domain_from_id(requester_user_id) != origin: raise SynapseError(403, "requester_user_id doesn't match origin") - new_content = yield self.handler.set_group_join_policy( + new_content = await self.handler.set_group_join_policy( group_id, requester_user_id, content ) - defer.returnValue((200, new_content)) + return 200, new_content class RoomComplexityServlet(BaseFederationServlet): @@ -1325,18 +1272,17 @@ class RoomComplexityServlet(BaseFederationServlet): PATH = "/rooms/(?P[^/]*)/complexity" PREFIX = FEDERATION_UNSTABLE_PREFIX - @defer.inlineCallbacks - def on_GET(self, origin, content, query, room_id): + async def on_GET(self, origin, content, query, room_id): store = self.handler.hs.get_datastore() - is_public = yield store.is_room_world_readable_or_publicly_joinable(room_id) + is_public = await store.is_room_world_readable_or_publicly_joinable(room_id) if not is_public: raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM) - complexity = yield store.get_room_complexity(room_id) - defer.returnValue((200, complexity)) + complexity = await store.get_room_complexity(room_id) + return 200, complexity FEDERATION_SERVLET_CLASSES = ( From 6de09e07a6f7d043589205c25594852900a6626b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jul 2019 15:33:37 +0100 Subject: [PATCH 58/80] Add membership column to current_state_events table. It turns out that doing a join is surprisingly expensive for the DB to do when room_membership table is larger than the disk cache. --- synapse/storage/events.py | 26 ++++++++++--------- synapse/storage/prepare_database.py | 2 +- synapse/storage/roommember.py | 6 ++--- .../56/current_state_events_membership.sql | 19 ++++++++++++++ synapse/storage/user_directory.py | 8 +++--- 5 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 synapse/storage/schema/delta/56/current_state_events_membership.sql diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b486ca50eb..b70457bfc6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -918,8 +918,6 @@ class EventsStore( min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering - self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) - self._update_forward_extremities_txn( txn, new_forward_extremities=new_forward_extremeties, @@ -993,6 +991,10 @@ class EventsStore( backfilled=backfilled, ) + # We call this last as it assumes we've inserted the events into + # room_memberships, where applicable. + self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) + def _update_current_state_txn(self, txn, state_delta_by_room, stream_id): for room_id, current_state_tuple in iteritems(state_delta_by_room): to_delete, to_insert = current_state_tuple @@ -1062,16 +1064,16 @@ class EventsStore( ), ) - self._simple_insert_many_txn( - txn, - table="current_state_events", - values=[ - { - "event_id": ev_id, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - } + # We include the membership in the current state table, hence we do + # a lookup when we insert. This assumes that all events have already + # been inserted into room_memberships. + txn.executemany( + """INSERT INTO current_state_events + (room_id, type, state_key, event_id, membership) + VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?)) + """, + [ + (room_id, key[0], key[1], ev_id, ev_id) for key, ev_id in iteritems(to_insert) ], ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 7c4e1dc7ec..d20eacda59 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 55 +SCHEMA_VERSION = 56 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 32cfd010a5..4946afe635 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -224,7 +224,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): results = [] if membership_list: where_clause = "user_id = ? AND (%s) AND forgotten = 0" % ( - " OR ".join(["membership = ?" for _ in membership_list]), + " OR ".join(["m.membership = ?" for _ in membership_list]), ) args = [user_id] @@ -453,8 +453,8 @@ class RoomMemberWorkerStore(EventsWorkerStore): sql = """ SELECT state_key FROM current_state_events AS c - INNER JOIN room_memberships USING (event_id) - WHERE membership = 'join' + INNER JOIN room_memberships AS m USING (event_id) + WHERE m.membership = 'join' AND type = 'm.room.member' AND c.room_id = ? AND state_key LIKE ? diff --git a/synapse/storage/schema/delta/56/current_state_events_membership.sql b/synapse/storage/schema/delta/56/current_state_events_membership.sql new file mode 100644 index 0000000000..5c754651cb --- /dev/null +++ b/synapse/storage/schema/delta/56/current_state_events_membership.sql @@ -0,0 +1,19 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We add membership to current state so that we don't need to join against +-- room_memberships, which can be surprisingly costly (we do such queries +-- very frequently). +ALTER TABLE current_state_events ADD membership TEXT; diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 83466e25d9..7fd16fe65e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -618,15 +618,15 @@ class UserDirectoryStore(StateDeltasStore, BackgroundUpdateStore): sql = """ SELECT room_id FROM ( SELECT c.room_id FROM current_state_events AS c - INNER JOIN room_memberships USING (event_id) + INNER JOIN room_memberships AS m USING (event_id) WHERE type = 'm.room.member' - AND membership = 'join' + AND m.membership = 'join' AND state_key = ? ) AS f1 INNER JOIN ( SELECT c.room_id FROM current_state_events AS c - INNER JOIN room_memberships USING (event_id) + INNER JOIN room_memberships AS m USING (event_id) WHERE type = 'm.room.member' - AND membership = 'join' + AND m.membership = 'join' AND state_key = ? ) f2 USING (room_id) """ From c618a5d348295b69885953cd1970fe1f339a4e9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jul 2019 15:50:08 +0100 Subject: [PATCH 59/80] Add background update for current_state_events.membership column --- synapse/storage/roommember.py | 51 +++++++++++++++++++ .../56/current_state_events_membership.sql | 3 ++ 2 files changed, 54 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4946afe635..275fef1f66 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -53,6 +53,7 @@ ProfileInfo = namedtuple("ProfileInfo", ("avatar_url", "display_name")) MemberSummary = namedtuple("MemberSummary", ("members", "count")) _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update" +_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership" class RoomMemberWorkerStore(EventsWorkerStore): @@ -602,6 +603,10 @@ class RoomMemberStore(RoomMemberWorkerStore): self.register_background_update_handler( _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile ) + self.register_background_update_handler( + _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME, + self._background_current_state_membership, + ) def _store_room_members_txn(self, txn, events, backfilled): """Store a room member in the database. @@ -781,6 +786,52 @@ class RoomMemberStore(RoomMemberWorkerStore): defer.returnValue(result) + @defer.inlineCallbacks + def _background_current_state_membership(self, progress, batch_size): + """Update the new membership column on current_state_events. + """ + + if "rooms" not in progress: + rooms = yield self._simple_select_onecol( + table="current_state_events", + keyvalues={}, + retcol="DISTINCT room_id", + desc="_background_current_state_membership_get_rooms", + ) + progress["rooms"] = rooms + + rooms = progress["rooms"] + + def _background_current_state_membership_txn(txn): + processed = 0 + while rooms and processed < batch_size: + sql = """ + UPDATE current_state_events AS c + SET membership = ( + SELECT membership FROM room_memberships + WHERE event_id = c.event_id + ) + WHERE room_id = ? + """ + txn.execute(sql, (rooms.pop(),)) + processed += txn.rowcount + + self._background_update_progress_txn( + txn, _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME, progress + ) + + return processed + + result = yield self.runInteraction( + "_background_current_state_membership_update", + _background_current_state_membership_txn, + ) + + if not rooms: + yield self._end_background_update(_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME) + + defer.returnValue(result) + class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates diff --git a/synapse/storage/schema/delta/56/current_state_events_membership.sql b/synapse/storage/schema/delta/56/current_state_events_membership.sql index 5c754651cb..ec7ad5bae2 100644 --- a/synapse/storage/schema/delta/56/current_state_events_membership.sql +++ b/synapse/storage/schema/delta/56/current_state_events_membership.sql @@ -17,3 +17,6 @@ -- room_memberships, which can be surprisingly costly (we do such queries -- very frequently). ALTER TABLE current_state_events ADD membership TEXT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('current_state_events_membership', '{}'); From 059d8c1a4e720f9a0a179f7109f38302885bc9a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jul 2019 16:09:14 +0100 Subject: [PATCH 60/80] Track if current_state_events.membership is up to date --- synapse/storage/roommember.py | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 275fef1f66..f913abf8d6 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -24,6 +24,8 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import EventTypes, Membership +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import LoggingTransaction from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer @@ -57,6 +59,49 @@ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership" class RoomMemberWorkerStore(EventsWorkerStore): + def __init__(self, db_conn, hs): + super(RoomMemberWorkerStore, self).__init__(db_conn, hs) + + # Is the current_state_events.membership up to date? Or is the + # background update still running? + self._current_state_events_membership_up_to_date = False + + txn = LoggingTransaction( + db_conn.cursor(), + name="_check_safe_current_state_events_membership_updated", + database_engine=self.database_engine, + after_callbacks=[], + exception_callbacks=[], + ) + self._check_safe_current_state_events_membership_updated_txn(txn) + txn.close() + + def _check_safe_current_state_events_membership_updated_txn(self, txn): + """Checks if it is safe to assume the new current_state_events + membership column is up to date + """ + + pending_update = self._simple_select_one_txn( + txn, + table="background_updates", + keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME}, + retcols=["update_name"], + allow_none=True, + ) + + self._current_state_events_membership_up_to_date = not pending_update + + # If the update is still running, reschedule to run. + if pending_update: + self._clock.call_later( + 15.0, + run_as_background_process, + "_check_safe_current_state_events_membership_updated", + self.runInteraction, + "_check_safe_current_state_events_membership_updated", + self._check_safe_current_state_events_membership_updated_txn, + ) + @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True) def get_hosts_in_room(self, room_id, cache_context): """Returns the set of all hosts currently in the room From 8e1ada9e6fdebe0cedaf39794a326196a9bd90d0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jul 2019 16:17:17 +0100 Subject: [PATCH 61/80] Use the current_state_events.membership column --- synapse/storage/roommember.py | 54 ++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index f913abf8d6..6541da3b8a 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -115,14 +115,23 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=100000, iterable=True) def get_users_in_room(self, room_id): def f(txn): - sql = ( - "SELECT m.user_id FROM room_memberships as m" - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id " - " AND m.room_id = c.room_id " - " AND m.user_id = c.state_key" - " WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?" - ) + # If we can assume current_state_events.membership is up to date + # then we can avoid a join, which is a Very Good Thing given how + # frequently this function gets called. + if self._current_state_events_membership_up_to_date: + sql = """ + SELECT state_key FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? AND membership = ? + """ + else: + sql = """ + SELECT state_key FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? + """ txn.execute(sql, (room_id, Membership.JOIN)) return [to_ascii(r[0]) for r in txn] @@ -144,15 +153,26 @@ class RoomMemberWorkerStore(EventsWorkerStore): # first get counts. # We do this all in one transaction to keep the cache small. # FIXME: get rid of this when we have room_stats - sql = """ - SELECT count(*), m.membership FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? - GROUP BY m.membership - """ + + # If we can assume current_state_events.membership is up to date + # then we can avoid a join, which is a Very Good Thing given how + # frequently this function gets called. + if self._current_state_events_membership_up_to_date: + sql = """ + SELECT count(*), membership FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + GROUP BY membership + """ + else: + sql = """ + SELECT count(*), m.membership FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? + GROUP BY m.membership + """ txn.execute(sql, (room_id,)) res = {} From 89c885909aeb4591756c011f5eb339d7301591d5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jul 2019 16:22:26 +0100 Subject: [PATCH 62/80] Newsfile --- changelog.d/5706.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5706.misc diff --git a/changelog.d/5706.misc b/changelog.d/5706.misc new file mode 100644 index 0000000000..5e15dfd5fa --- /dev/null +++ b/changelog.d/5706.misc @@ -0,0 +1 @@ +Reduce database IO usage by optimising queries for current membership. From b2a382efdb0cf7b68e194070b616f10732fa0f36 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 18 Jul 2019 14:41:42 +0100 Subject: [PATCH 63/80] Remove the ability to query relations when the original event was redacted. (#5629) Fixes #5594 Forbid viewing relations on an event once it has been redacted. --- changelog.d/5629.bugfix | 1 + synapse/events/__init__.py | 11 ++ synapse/events/utils.py | 16 ++- synapse/rest/client/v2_alpha/relations.py | 65 ++++++----- tests/rest/client/v2_alpha/test_relations.py | 116 ++++++++++++++++++- 5 files changed, 175 insertions(+), 34 deletions(-) create mode 100644 changelog.d/5629.bugfix diff --git a/changelog.d/5629.bugfix b/changelog.d/5629.bugfix new file mode 100644 index 0000000000..672eabad40 --- /dev/null +++ b/changelog.d/5629.bugfix @@ -0,0 +1 @@ +Forbid viewing relations on an event once it has been redacted. diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index d3de70e671..88ed6d764f 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -104,6 +104,17 @@ class _EventInternalMetadata(object): """ return getattr(self, "proactively_send", True) + def is_redacted(self): + """Whether the event has been redacted. + + This is used for efficiently checking whether an event has been + marked as redacted without needing to make another database call. + + Returns: + bool + """ + return getattr(self, "redacted", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 987de5cab7..9487a886f5 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -52,10 +52,15 @@ def prune_event(event): from . import event_type_from_format_version - return event_type_from_format_version(event.format_version)( + pruned_event = event_type_from_format_version(event.format_version)( pruned_event_dict, event.internal_metadata.get_dict() ) + # Mark the event as redacted + pruned_event.internal_metadata.redacted = True + + return pruned_event + def prune_event_dict(event_dict): """Redacts the event_dict in the same way as `prune_event`, except it @@ -360,9 +365,12 @@ class EventClientSerializer(object): event_id = event.event_id serialized_event = serialize_event(event, time_now, **kwargs) - # If MSC1849 is enabled then we need to look if thre are any relations - # we need to bundle in with the event - if self.experimental_msc1849_support_enabled and bundle_aggregations: + # If MSC1849 is enabled then we need to look if there are any relations + # we need to bundle in with the event. + # Do not bundle relations if the event has been redacted + if not event.internal_metadata.is_redacted() and ( + self.experimental_msc1849_support_enabled and bundle_aggregations + ): annotations = yield self.store.get_aggregation_groups_for_event(event_id) references = yield self.store.get_relations_for_event( event_id, RelationTypes.REFERENCE, direction="f" diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py index 7ce485b471..6e52f6d284 100644 --- a/synapse/rest/client/v2_alpha/relations.py +++ b/synapse/rest/client/v2_alpha/relations.py @@ -34,6 +34,7 @@ from synapse.http.servlet import ( from synapse.rest.client.transactions import HttpTransactionCache from synapse.storage.relations import ( AggregationPaginationToken, + PaginationChunk, RelationPaginationToken, ) @@ -153,23 +154,28 @@ class RelationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") - if from_token: - from_token = RelationPaginationToken.from_string(from_token) + if event.internal_metadata.is_redacted(): + # If the event is redacted, return an empty list of relations + pagination_chunk = PaginationChunk(chunk=[]) + else: + # Return the relations + if from_token: + from_token = RelationPaginationToken.from_string(from_token) - if to_token: - to_token = RelationPaginationToken.from_string(to_token) + if to_token: + to_token = RelationPaginationToken.from_string(to_token) - result = yield self.store.get_relations_for_event( - event_id=parent_id, - relation_type=relation_type, - event_type=event_type, - limit=limit, - from_token=from_token, - to_token=to_token, - ) + pagination_chunk = yield self.store.get_relations_for_event( + event_id=parent_id, + relation_type=relation_type, + event_type=event_type, + limit=limit, + from_token=from_token, + to_token=to_token, + ) events = yield self.store.get_events_as_list( - [c["event_id"] for c in result.chunk] + [c["event_id"] for c in pagination_chunk.chunk] ) now = self.clock.time_msec() @@ -186,7 +192,7 @@ class RelationPaginationServlet(RestServlet): events, now, bundle_aggregations=False ) - return_value = result.to_dict() + return_value = pagination_chunk.to_dict() return_value["chunk"] = events return_value["original_event"] = original_event @@ -234,7 +240,7 @@ class RelationAggregationPaginationServlet(RestServlet): # This checks that a) the event exists and b) the user is allowed to # view it. - yield self.event_handler.get_event(requester.user, room_id, parent_id) + event = yield self.event_handler.get_event(requester.user, room_id, parent_id) if relation_type not in (RelationTypes.ANNOTATION, None): raise SynapseError(400, "Relation type must be 'annotation'") @@ -243,21 +249,26 @@ class RelationAggregationPaginationServlet(RestServlet): from_token = parse_string(request, "from") to_token = parse_string(request, "to") - if from_token: - from_token = AggregationPaginationToken.from_string(from_token) + if event.internal_metadata.is_redacted(): + # If the event is redacted, return an empty list of relations + pagination_chunk = PaginationChunk(chunk=[]) + else: + # Return the relations + if from_token: + from_token = AggregationPaginationToken.from_string(from_token) - if to_token: - to_token = AggregationPaginationToken.from_string(to_token) + if to_token: + to_token = AggregationPaginationToken.from_string(to_token) - res = yield self.store.get_aggregation_groups_for_event( - event_id=parent_id, - event_type=event_type, - limit=limit, - from_token=from_token, - to_token=to_token, - ) + pagination_chunk = yield self.store.get_aggregation_groups_for_event( + event_id=parent_id, + event_type=event_type, + limit=limit, + from_token=from_token, + to_token=to_token, + ) - defer.returnValue((200, res.to_dict())) + defer.returnValue((200, pagination_chunk.to_dict())) class RelationAggregationGroupPaginationServlet(RestServlet): diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py index 58c6951852..c7e5859970 100644 --- a/tests/rest/client/v2_alpha/test_relations.py +++ b/tests/rest/client/v2_alpha/test_relations.py @@ -93,7 +93,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): def test_deny_double_react(self): """Test that we deny relations on membership events """ - channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") self.assertEquals(200, channel.code, channel.json_body) channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") @@ -540,14 +540,122 @@ class RelationsTestCase(unittest.HomeserverTestCase): {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict ) + def test_relations_redaction_redacts_edits(self): + """Test that edits of an event are redacted when the original event + is redacted. + """ + # Send a new event + res = self.helper.send(self.room, body="Heyo!", tok=self.user_token) + original_event_id = res["event_id"] + + # Add a relation + channel = self._send_relation( + RelationTypes.REPLACE, + "m.room.message", + parent_id=original_event_id, + content={ + "msgtype": "m.text", + "body": "Wibble", + "m.new_content": {"msgtype": "m.text", "body": "First edit"}, + }, + ) + self.assertEquals(200, channel.code, channel.json_body) + + # Check the relation is returned + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s/m.replace/m.room.message" + % (self.room, original_event_id), + access_token=self.user_token, + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertIn("chunk", channel.json_body) + self.assertEquals(len(channel.json_body["chunk"]), 1) + + # Redact the original event + request, channel = self.make_request( + "PUT", + "/rooms/%s/redact/%s/%s" + % (self.room, original_event_id, "test_relations_redaction_redacts_edits"), + access_token=self.user_token, + content="{}", + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + # Try to check for remaining m.replace relations + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s/m.replace/m.room.message" + % (self.room, original_event_id), + access_token=self.user_token, + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + # Check that no relations are returned + self.assertIn("chunk", channel.json_body) + self.assertEquals(channel.json_body["chunk"], []) + + def test_aggregations_redaction_prevents_access_to_aggregations(self): + """Test that annotations of an event are redacted when the original event + is redacted. + """ + # Send a new event + res = self.helper.send(self.room, body="Hello!", tok=self.user_token) + original_event_id = res["event_id"] + + # Add a relation + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", key="👍", parent_id=original_event_id + ) + self.assertEquals(200, channel.code, channel.json_body) + + # Redact the original + request, channel = self.make_request( + "PUT", + "/rooms/%s/redact/%s/%s" + % ( + self.room, + original_event_id, + "test_aggregations_redaction_prevents_access_to_aggregations", + ), + access_token=self.user_token, + content="{}", + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + # Check that aggregations returns zero + request, channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s/m.annotation/m.reaction" + % (self.room, original_event_id), + access_token=self.user_token, + ) + self.render(request) + self.assertEquals(200, channel.code, channel.json_body) + + self.assertIn("chunk", channel.json_body) + self.assertEquals(channel.json_body["chunk"], []) + def _send_relation( - self, relation_type, event_type, key=None, content={}, access_token=None + self, + relation_type, + event_type, + key=None, + content={}, + access_token=None, + parent_id=None, ): """Helper function to send a relation pointing at `self.parent_id` Args: relation_type (str): One of `RelationTypes` event_type (str): The type of the event to create + parent_id (str): The event_id this relation relates to. If None, then self.parent_id key (str|None): The aggregation key used for m.annotation relation type. content(dict|None): The content of the created event. @@ -564,10 +672,12 @@ class RelationsTestCase(unittest.HomeserverTestCase): if key: query = "?key=" + six.moves.urllib.parse.quote_plus(key.encode("utf-8")) + original_id = parent_id if parent_id else self.parent_id + request, channel = self.make_request( "POST", "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s" - % (self.room, self.parent_id, relation_type, event_type, query), + % (self.room, original_id, relation_type, event_type, query), json.dumps(content).encode("utf-8"), access_token=access_token, ) From 7ad1d763566eb34bd32234811aa9901d8f3668aa Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 18 Jul 2019 23:57:15 +1000 Subject: [PATCH 64/80] Support Prometheus_client 0.4.0+ (#5636) --- UPGRADE.rst | 7 + changelog.d/5636.misc | 1 + docs/metrics-howto.rst | 102 +++++++++++ synapse/app/_base.py | 3 +- synapse/app/appservice.py | 3 +- synapse/app/client_reader.py | 3 +- synapse/app/event_creator.py | 3 +- synapse/app/federation_reader.py | 3 +- synapse/app/federation_sender.py | 3 +- synapse/app/frontend_proxy.py | 3 +- synapse/app/homeserver.py | 3 +- synapse/app/media_repository.py | 3 +- synapse/app/pusher.py | 3 +- synapse/app/synchrotron.py | 3 +- synapse/app/user_dir.py | 3 +- synapse/metrics/__init__.py | 17 ++ synapse/metrics/_exposition.py | 258 ++++++++++++++++++++++++++++ synapse/metrics/resource.py | 20 --- synapse/python_dependencies.py | 4 +- tests/storage/test_event_metrics.py | 4 +- 20 files changed, 399 insertions(+), 50 deletions(-) create mode 100644 changelog.d/5636.misc create mode 100644 synapse/metrics/_exposition.py delete mode 100644 synapse/metrics/resource.py diff --git a/UPGRADE.rst b/UPGRADE.rst index 72064accf3..cf228c7c52 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -49,6 +49,13 @@ returned by the Client-Server API: # configured on port 443. curl -kv https:///_matrix/client/versions 2>&1 | grep "Server:" +Upgrading to v1.2.0 +=================== + +Some counter metrics have been renamed, with the old names deprecated. See +`the metrics documentation `_ +for details. + Upgrading to v1.1.0 =================== diff --git a/changelog.d/5636.misc b/changelog.d/5636.misc new file mode 100644 index 0000000000..3add990283 --- /dev/null +++ b/changelog.d/5636.misc @@ -0,0 +1 @@ +Some counter metrics exposed over Prometheus have been renamed, with the old names preserved for backwards compatibility and deprecated. See `docs/metrics-howto.rst` for details. \ No newline at end of file diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst index 32b064e2da..973641f3dc 100644 --- a/docs/metrics-howto.rst +++ b/docs/metrics-howto.rst @@ -59,6 +59,108 @@ How to monitor Synapse metrics using Prometheus Restart Prometheus. +Renaming of metrics & deprecation of old names in 1.2 +----------------------------------------------------- + +Synapse 1.2 updates the Prometheus metrics to match the naming convention of the +upstream ``prometheus_client``. The old names are considered deprecated and will +be removed in a future version of Synapse. + ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| New Name | Old Name | ++=============================================================================+=======================================================================+ +| python_gc_objects_collected_total | python_gc_objects_collected | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| python_gc_objects_uncollectable_total | python_gc_objects_uncollectable | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| python_gc_collections_total | python_gc_collections | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| process_cpu_seconds_total | process_cpu_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_client_sent_transactions_total | synapse_federation_client_sent_transactions | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_ru_stime_seconds_total | synapse_util_metrics_block_ru_stime_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_db_txn_count_total | synapse_util_metrics_block_db_txn_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_db_txn_duration_seconds_total | synapse_util_metrics_block_db_txn_duration_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_util_metrics_block_db_sched_duration_seconds_total | synapse_util_metrics_block_db_sched_duration_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_start_count_total | synapse_background_process_start_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_ru_utime_seconds_total | synapse_background_process_ru_utime_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_ru_stime_seconds_total | synapse_background_process_ru_stime_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_db_txn_count_total | synapse_background_process_db_txn_count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_db_txn_duration_seconds_total | synapse_background_process_db_txn_duration_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_background_process_db_sched_duration_seconds_total | synapse_background_process_db_sched_duration_seconds | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_storage_events_persisted_events_total | synapse_storage_events_persisted_events | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_storage_events_persisted_events_sep_total | synapse_storage_events_persisted_events_sep | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_storage_events_state_delta_total | synapse_storage_events_state_delta | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_storage_events_state_delta_single_event_total | synapse_storage_events_state_delta_single_event | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_storage_events_state_delta_reuse_delta_total | synapse_storage_events_state_delta_reuse_delta | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_server_received_pdus_total | synapse_federation_server_received_pdus | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_server_received_edus_total | synapse_federation_server_received_edus | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_notified_presence_total | synapse_handler_presence_notified_presence | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_federation_presence_out_total | synapse_handler_presence_federation_presence_out | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_presence_updates_total | synapse_handler_presence_presence_updates | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_timers_fired_total | synapse_handler_presence_timers_fired | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_federation_presence_total | synapse_handler_presence_federation_presence | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handler_presence_bump_active_time_total | synapse_handler_presence_bump_active_time | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_client_sent_edus_total | synapse_federation_client_sent_edus | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_client_sent_pdu_destinations_count_total | synapse_federation_client_sent_pdu_destinations:count | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_federation_client_sent_pdu_destinations_total | synapse_federation_client_sent_pdu_destinations:total | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_handlers_appservice_events_processed_total | synapse_handlers_appservice_events_processed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_notifier_notified_events_total | synapse_notifier_notified_events | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_http_httppusher_http_pushes_processed_total | synapse_http_httppusher_http_pushes_processed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_http_httppusher_http_pushes_failed_total | synapse_http_httppusher_http_pushes_failed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_http_httppusher_badge_updates_processed_total | synapse_http_httppusher_badge_updates_processed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ +| synapse_http_httppusher_badge_updates_failed_total | synapse_http_httppusher_badge_updates_failed | ++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+ + + Removal of deprecated metrics & time based counters becoming histograms in 0.31.0 --------------------------------------------------------------------------------- diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 807f320b46..540dbd9236 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -149,8 +149,7 @@ def listen_metrics(bind_addresses, port): """ Start Prometheus metrics server. """ - from synapse.metrics import RegistryProxy - from prometheus_client import start_http_server + from synapse.metrics import RegistryProxy, start_http_server for host in bind_addresses: logger.info("Starting metrics listener on %s:%d", host, port) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index be44249ed6..e01f3e5f3b 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -27,8 +27,7 @@ from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext, run_in_background -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index ff11beca82..29bddc4823 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -28,8 +28,7 @@ from synapse.config.logger import setup_logging from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index cacad25eac..042cfd04af 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -28,8 +28,7 @@ from synapse.config.logger import setup_logging from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 11e80dbae0..76a97f8f32 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -29,8 +29,7 @@ from synapse.config.logger import setup_logging from synapse.federation.transport.server import TransportLayerServer from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 97da7bdcbf..fec49d5092 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -28,9 +28,8 @@ from synapse.config.logger import setup_logging from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext, run_in_background -from synapse.metrics import RegistryProxy +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.events import SlavedEventStore diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index 417a10bbd2..1f1f1df78e 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -30,8 +30,7 @@ from synapse.http.server import JsonResource from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 639b1429c0..0c075cb3f1 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -55,9 +55,8 @@ from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.module_api import ModuleApi from synapse.python_dependencies import check_requirements from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index f23b9b6eda..d70780e9d5 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -28,8 +28,7 @@ from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 4f929edf86..070de7d0b0 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -27,8 +27,7 @@ from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext, run_in_background -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import __func__ from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.events import SlavedEventStore diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index de4797fddc..315c030694 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -32,8 +32,7 @@ from synapse.handlers.presence import PresenceHandler, get_interested_parties from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext, run_in_background -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore, __func__ from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index 1177ddd72e..03ef21bd01 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -29,8 +29,7 @@ from synapse.config.logger import setup_logging from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext, run_in_background -from synapse.metrics import RegistryProxy -from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index eaf0aaa86e..488280b4a6 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -29,8 +29,16 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF from twisted.internet import reactor +from synapse.metrics._exposition import ( + MetricsResource, + generate_latest, + start_http_server, +) + logger = logging.getLogger(__name__) +METRICS_PREFIX = "/_synapse/metrics" + running_on_pypy = platform.python_implementation() == "PyPy" all_metrics = [] all_collectors = [] @@ -470,3 +478,12 @@ try: gc.disable() except AttributeError: pass + +__all__ = [ + "MetricsResource", + "generate_latest", + "start_http_server", + "LaterGauge", + "InFlightGauge", + "BucketCollector", +] diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py new file mode 100644 index 0000000000..1933ecd3e3 --- /dev/null +++ b/synapse/metrics/_exposition.py @@ -0,0 +1,258 @@ +# -*- coding: utf-8 -*- +# Copyright 2015-2019 Prometheus Python Client Developers +# Copyright 2019 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code is based off `prometheus_client/exposition.py` from version 0.7.1. + +Due to the renaming of metrics in prometheus_client 0.4.0, this customised +vendoring of the code will emit both the old versions that Synapse dashboards +expect, and the newer "best practice" version of the up-to-date official client. +""" + +import math +import threading +from collections import namedtuple +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +from urllib.parse import parse_qs, urlparse + +from prometheus_client import REGISTRY + +from twisted.web.resource import Resource + +try: + from prometheus_client.samples import Sample +except ImportError: + Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"]) + + +CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8") + + +INF = float("inf") +MINUS_INF = float("-inf") + + +def floatToGoString(d): + d = float(d) + if d == INF: + return "+Inf" + elif d == MINUS_INF: + return "-Inf" + elif math.isnan(d): + return "NaN" + else: + s = repr(d) + dot = s.find(".") + # Go switches to exponents sooner than Python. + # We only need to care about positive values for le/quantile. + if d > 0 and dot > 6: + mantissa = "{0}.{1}{2}".format(s[0], s[1:dot], s[dot + 1 :]).rstrip("0.") + return "{0}e+0{1}".format(mantissa, dot - 1) + return s + + +def sample_line(line, name): + if line.labels: + labelstr = "{{{0}}}".format( + ",".join( + [ + '{0}="{1}"'.format( + k, + v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""), + ) + for k, v in sorted(line.labels.items()) + ] + ) + ) + else: + labelstr = "" + timestamp = "" + if line.timestamp is not None: + # Convert to milliseconds. + timestamp = " {0:d}".format(int(float(line.timestamp) * 1000)) + return "{0}{1} {2}{3}\n".format( + name, labelstr, floatToGoString(line.value), timestamp + ) + + +def nameify_sample(sample): + """ + If we get a prometheus_client<0.4.0 sample as a tuple, transform it into a + namedtuple which has the names we expect. + """ + if not isinstance(sample, Sample): + sample = Sample(*sample, None, None) + + return sample + + +def generate_latest(registry, emit_help=False): + output = [] + + for metric in registry.collect(): + + if metric.name.startswith("__unused"): + continue + + if not metric.samples: + # No samples, don't bother. + continue + + mname = metric.name + mnewname = metric.name + mtype = metric.type + + # OpenMetrics -> Prometheus + if mtype == "counter": + mnewname = mnewname + "_total" + elif mtype == "info": + mtype = "gauge" + mnewname = mnewname + "_info" + elif mtype == "stateset": + mtype = "gauge" + elif mtype == "gaugehistogram": + mtype = "histogram" + elif mtype == "unknown": + mtype = "untyped" + + # Output in the old format for compatibility. + if emit_help: + output.append( + "# HELP {0} {1}\n".format( + mname, + metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), + ) + ) + output.append("# TYPE {0} {1}\n".format(mname, mtype)) + for sample in map(nameify_sample, metric.samples): + # Get rid of the OpenMetrics specific samples + for suffix in ["_created", "_gsum", "_gcount"]: + if sample.name.endswith(suffix): + break + else: + newname = sample.name.replace(mnewname, mname) + if ":" in newname and newname.endswith("_total"): + newname = newname[: -len("_total")] + output.append(sample_line(sample, newname)) + + # Get rid of the weird colon things while we're at it + if mtype == "counter": + mnewname = mnewname.replace(":total", "") + mnewname = mnewname.replace(":", "_") + + if mname == mnewname: + continue + + # Also output in the new format, if it's different. + if emit_help: + output.append( + "# HELP {0} {1}\n".format( + mnewname, + metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), + ) + ) + output.append("# TYPE {0} {1}\n".format(mnewname, mtype)) + for sample in map(nameify_sample, metric.samples): + # Get rid of the OpenMetrics specific samples + for suffix in ["_created", "_gsum", "_gcount"]: + if sample.name.endswith(suffix): + break + else: + output.append( + sample_line( + sample, sample.name.replace(":total", "").replace(":", "_") + ) + ) + + return "".join(output).encode("utf-8") + + +class MetricsHandler(BaseHTTPRequestHandler): + """HTTP handler that gives metrics from ``REGISTRY``.""" + + registry = REGISTRY + + def do_GET(self): + registry = self.registry + params = parse_qs(urlparse(self.path).query) + + if "help" in params: + emit_help = True + else: + emit_help = False + + try: + output = generate_latest(registry, emit_help=emit_help) + except Exception: + self.send_error(500, "error generating metric output") + raise + self.send_response(200) + self.send_header("Content-Type", CONTENT_TYPE_LATEST) + self.end_headers() + self.wfile.write(output) + + def log_message(self, format, *args): + """Log nothing.""" + + @classmethod + def factory(cls, registry): + """Returns a dynamic MetricsHandler class tied + to the passed registry. + """ + # This implementation relies on MetricsHandler.registry + # (defined above and defaulted to REGISTRY). + + # As we have unicode_literals, we need to create a str() + # object for type(). + cls_name = str(cls.__name__) + MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry}) + return MyMetricsHandler + + +class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): + """Thread per request HTTP server.""" + + # Make worker threads "fire and forget". Beginning with Python 3.7 this + # prevents a memory leak because ``ThreadingMixIn`` starts to gather all + # non-daemon threads in a list in order to join on them at server close. + # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the + # same as Python 3.7's ``ThreadingHTTPServer``. + daemon_threads = True + + +def start_http_server(port, addr="", registry=REGISTRY): + """Starts an HTTP server for prometheus metrics as a daemon thread""" + CustomMetricsHandler = MetricsHandler.factory(registry) + httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) + t = threading.Thread(target=httpd.serve_forever) + t.daemon = True + t.start() + + +class MetricsResource(Resource): + """ + Twisted ``Resource`` that serves prometheus metrics. + """ + + isLeaf = True + + def __init__(self, registry=REGISTRY): + self.registry = registry + + def render_GET(self, request): + request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) + return generate_latest(self.registry) diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py deleted file mode 100644 index 9789359077..0000000000 --- a/synapse/metrics/resource.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from prometheus_client.twisted import MetricsResource - -METRICS_PREFIX = "/_synapse/metrics" - -__all__ = ["MetricsResource", "METRICS_PREFIX"] diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index e7618057be..c6465c0386 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -65,9 +65,7 @@ REQUIREMENTS = [ "msgpack>=0.5.2", "phonenumbers>=8.2.0", "six>=1.10", - # prometheus_client 0.4.0 changed the format of counter metrics - # (cf https://github.com/matrix-org/synapse/issues/4001) - "prometheus_client>=0.0.18,<0.4.0", + "prometheus_client>=0.0.18,<0.8.0", # we use attr.s(slots), which arrived in 16.0.0 # Twisted 18.7.0 requires attrs>=17.4.0 "attrs>=17.4.0", diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py index d44359ff93..f26ff57a18 100644 --- a/tests/storage/test_event_metrics.py +++ b/tests/storage/test_event_metrics.py @@ -13,9 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from prometheus_client.exposition import generate_latest - -from synapse.metrics import REGISTRY +from synapse.metrics import REGISTRY, generate_latest from synapse.types import Requester, UserID from tests.unittest import HomeserverTestCase From 82345bc09a9980de58c13a18b489403237acf4bd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 18 Jul 2019 15:06:54 +0100 Subject: [PATCH 65/80] Clean up opentracing configuration options (#5712) Clean up config settings and dead code. This is mostly about cleaning up the config format, to bring it into line with our conventions. In particular: * There should be a blank line after `## Section ##' headings * There should be a blank line between each config setting * There should be a `#`-only line between a comment and the setting it describes * We don't really do the `# #` style commenting-out of whole sections if we can help it * rename `tracer_enabled` to `enabled` While we're here, do more config parsing upfront, which makes it easier to use later on. Also removes redundant code from LogContextScopeManager. Also changes the changelog fragment to a `feature` - it's exciting! --- changelog.d/5544.feature | 2 + changelog.d/5544.misc | 1 - changelog.d/5712.feature | 2 + docs/sample_config.yaml | 43 ++++++++++++------ synapse/config/tracer.py | 61 +++++++++++++++++--------- synapse/logging/opentracing.py | 42 ++++++++---------- synapse/logging/scopecontextmanager.py | 4 +- 7 files changed, 94 insertions(+), 61 deletions(-) create mode 100644 changelog.d/5544.feature delete mode 100644 changelog.d/5544.misc create mode 100644 changelog.d/5712.feature diff --git a/changelog.d/5544.feature b/changelog.d/5544.feature new file mode 100644 index 0000000000..7d3459129d --- /dev/null +++ b/changelog.d/5544.feature @@ -0,0 +1,2 @@ +Add support for opentracing. + diff --git a/changelog.d/5544.misc b/changelog.d/5544.misc deleted file mode 100644 index 81d6f74c31..0000000000 --- a/changelog.d/5544.misc +++ /dev/null @@ -1 +0,0 @@ -Added opentracing and configuration options. diff --git a/changelog.d/5712.feature b/changelog.d/5712.feature new file mode 100644 index 0000000000..7d3459129d --- /dev/null +++ b/changelog.d/5712.feature @@ -0,0 +1,2 @@ +Add support for opentracing. + diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 663ff31622..5b804d16a4 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1409,17 +1409,34 @@ password_config: ## Opentracing ## -# These settings enable opentracing which implements distributed tracing -# This allows you to observe the causal chain of events across servers -# including requests, key lookups etc. across any server running -# synapse or any other other services which supports opentracing. -# (specifically those implemented with jaeger) -#opentracing: -# # Enable / disable tracer -# tracer_enabled: false -# # The list of homeservers we wish to expose our current traces to. -# # The list is a list of regexes which are matched against the -# # servername of the homeserver -# homeserver_whitelist: -# - ".*" +# These settings enable opentracing, which implements distributed tracing. +# This allows you to observe the causal chains of events across servers +# including requests, key lookups etc., across any server running +# synapse or any other other services which supports opentracing +# (specifically those implemented with Jaeger). +# +opentracing: + # tracing is disabled by default. Uncomment the following line to enable it. + # + #enabled: true + + # The list of homeservers we wish to send and receive span contexts and span baggage. + # + # Though it's mostly safe to send and receive span contexts to and from + # untrusted users since span contexts are usually opaque ids it can lead to + # two problems, namely: + # - If the span context is marked as sampled by the sending homeserver the receiver will + # sample it. Therefore two homeservers with wildly disparaging sampling policies + # could incur higher sampling counts than intended. + # - Span baggage can be arbitrary data. For safety this has been disabled in synapse + # but that doesn't prevent another server sending you baggage which will be logged + # to opentracing logs. + # + # This a list of regexes which are matched against the server_name of the + # homeserver. + # + # By defult, it is empty, so no servers are matched. + # + #homeserver_whitelist: + # - ".*" diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 63a637984a..a2ce9ab3f6 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -18,33 +18,52 @@ from ._base import Config, ConfigError class TracerConfig(Config): def read_config(self, config, **kwargs): - self.tracer_config = config.get("opentracing") + opentracing_config = config.get("opentracing") + if opentracing_config is None: + opentracing_config = {} - self.tracer_config = config.get("opentracing", {"tracer_enabled": False}) + self.opentracer_enabled = opentracing_config.get("enabled", False) + if not self.opentracer_enabled: + return - if self.tracer_config.get("tracer_enabled", False): - # The tracer is enabled so sanitize the config - # If no whitelists are given - self.tracer_config.setdefault("homeserver_whitelist", []) + # The tracer is enabled so sanitize the config - if not isinstance(self.tracer_config.get("homeserver_whitelist"), list): - raise ConfigError("Tracer homesererver_whitelist config is malformed") + self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", []) + if not isinstance(self.opentracer_whitelist, list): + raise ConfigError("Tracer homeserver_whitelist config is malformed") def generate_config_section(cls, **kwargs): return """\ ## Opentracing ## - # These settings enable opentracing which implements distributed tracing - # This allows you to observe the causal chain of events across servers - # including requests, key lookups etc. across any server running - # synapse or any other other services which supports opentracing. - # (specifically those implemented with jaeger) - #opentracing: - # # Enable / disable tracer - # tracer_enabled: false - # # The list of homeservers we wish to expose our current traces to. - # # The list is a list of regexes which are matched against the - # # servername of the homeserver - # homeserver_whitelist: - # - ".*" + # These settings enable opentracing, which implements distributed tracing. + # This allows you to observe the causal chains of events across servers + # including requests, key lookups etc., across any server running + # synapse or any other other services which supports opentracing + # (specifically those implemented with Jaeger). + # + opentracing: + # tracing is disabled by default. Uncomment the following line to enable it. + # + #enabled: true + + # The list of homeservers we wish to send and receive span contexts and span baggage. + # + # Though it's mostly safe to send and receive span contexts to and from + # untrusted users since span contexts are usually opaque ids it can lead to + # two problems, namely: + # - If the span context is marked as sampled by the sending homeserver the receiver will + # sample it. Therefore two homeservers with wildly disparaging sampling policies + # could incur higher sampling counts than intended. + # - Span baggage can be arbitrary data. For safety this has been disabled in synapse + # but that doesn't prevent another server sending you baggage which will be logged + # to opentracing logs. + # + # This a list of regexes which are matched against the server_name of the + # homeserver. + # + # By defult, it is empty, so no servers are matched. + # + #homeserver_whitelist: + # - ".*" """ diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index f0ceea2a64..415040f5ee 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2019 The Matrix.org Foundation C.I.C.d +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,15 @@ # this move the methods have work very similarly to opentracing's and it should only # be a matter of few regexes to move over to opentracing's access patterns proper. +import contextlib +import logging +import re +from functools import wraps + +from twisted.internet import defer + +from synapse.config import ConfigError + try: import opentracing except ImportError: @@ -35,12 +44,6 @@ except ImportError: JaegerConfig = None LogContextScopeManager = None -import contextlib -import logging -import re -from functools import wraps - -from twisted.internet import defer logger = logging.getLogger(__name__) @@ -91,7 +94,8 @@ def only_if_tracing(func): return _only_if_tracing_inner -# Block everything by default +# A regex which matches the server_names to expose traces for. +# None means 'block everything'. _homeserver_whitelist = None tags = _DumTagNames @@ -101,31 +105,24 @@ def init_tracer(config): """Set the whitelists and initialise the JaegerClient tracer Args: - config (Config) - The config used by the homeserver. Here it's used to set the service - name to the homeserver's. + config (HomeserverConfig): The config used by the homeserver """ global opentracing - if not config.tracer_config.get("tracer_enabled", False): + if not config.opentracer_enabled: # We don't have a tracer opentracing = None return - if not opentracing: - logger.error( - "The server has been configure to use opentracing but opentracing is not installed." - ) - raise ModuleNotFoundError("opentracing") - - if not JaegerConfig: - logger.error( - "The server has been configure to use opentracing but opentracing is not installed." + if not opentracing or not JaegerConfig: + raise ConfigError( + "The server has been configured to use opentracing but opentracing is not " + "installed." ) # Include the worker name name = config.worker_name if config.worker_name else "master" - set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"]) + set_homeserver_whitelist(config.opentracer_whitelist) jaeger_config = JaegerConfig( config={"sampler": {"type": "const", "param": 1}, "logging": True}, service_name="{} {}".format(config.server_name, name), @@ -232,7 +229,6 @@ def whitelisted_homeserver(destination): """Checks if a destination matches the whitelist Args: destination (String)""" - global _homeserver_whitelist if _homeserver_whitelist: return _homeserver_whitelist.match(destination) return False diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index 91e14462f3..8c661302c9 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -34,9 +34,7 @@ class LogContextScopeManager(ScopeManager): """ def __init__(self, config): - # Set the whitelists - logger.info(config.tracer_config) - self._homeserver_whitelist = config.tracer_config["homeserver_whitelist"] + pass @property def active(self): From 10523241d86eeaa1fa43607a03352f9e7b04efda Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jul 2019 15:17:37 +0100 Subject: [PATCH 66/80] Delegate to cached version when using get_filtered_current_state_ids In the case where it gets called with `StateFilter.all()` --- synapse/storage/state.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0bfe1b4550..a35289876d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -510,6 +510,12 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): event ID. """ + where_clause, where_args = state_filter.make_sql_filter_clause() + + if not where_clause: + # We delegate to the cached version + return self.get_current_state_ids(room_id) + def _get_filtered_current_state_ids_txn(txn): results = {} sql = """ @@ -517,8 +523,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): WHERE room_id = ? """ - where_clause, where_args = state_filter.make_sql_filter_clause() - if where_clause: sql += " AND (%s)" % (where_clause,) From dd2851d576649194205725bb5105f3cbb4a87e55 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jul 2019 15:27:18 +0100 Subject: [PATCH 67/80] Newsfile --- changelog.d/5713.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5713.misc diff --git a/changelog.d/5713.misc b/changelog.d/5713.misc new file mode 100644 index 0000000000..01ea1cf8d7 --- /dev/null +++ b/changelog.d/5713.misc @@ -0,0 +1 @@ +Improve caching when fetching `get_filtered_current_state_ids`. From cfc00068bd7f7517ae5fd1d3ecd147518a0eb787 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 18 Jul 2019 15:56:59 +0100 Subject: [PATCH 68/80] enable aggregations support by default --- synapse/config/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index 080d0630bd..00170f1393 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -136,7 +136,7 @@ class ServerConfig(Config): # Whether to enable experimental MSC1849 (aka relations) support self.experimental_msc1849_support_enabled = config.get( - "experimental_msc1849_support_enabled", False + "experimental_msc1849_support_enabled", True ) # Options to control access by tracking MAU From a3e40bd5b426c3cf852c389bbcc9fc392289c3ed Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 18 Jul 2019 16:02:09 +0100 Subject: [PATCH 69/80] towncrier --- changelog.d/5714.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5714.feature diff --git a/changelog.d/5714.feature b/changelog.d/5714.feature new file mode 100644 index 0000000000..2fd32e5e38 --- /dev/null +++ b/changelog.d/5714.feature @@ -0,0 +1 @@ +Enable aggregations support by default From 6a85cb5ef7f9dfe4cd58abc313d66ee270db3549 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 19 Jul 2019 01:40:08 +1000 Subject: [PATCH 70/80] Remove non-dedicated logging options and command line arguments (#5678) --- changelog.d/5678.removal | 1 + synapse/config/logger.py | 81 ++++++--------------------------------- synapse/config/workers.py | 6 --- 3 files changed, 13 insertions(+), 75 deletions(-) create mode 100644 changelog.d/5678.removal diff --git a/changelog.d/5678.removal b/changelog.d/5678.removal new file mode 100644 index 0000000000..085b84fda6 --- /dev/null +++ b/changelog.d/5678.removal @@ -0,0 +1 @@ +Synapse now no longer accepts the `-v`/`--verbose`, `-f`/`--log-file`, or `--log-config` command line flags, and removes the deprecated `verbose` and `log_file` configuration file options. Users of these options should migrate their options into the dedicated log configuration. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 40502a5798..d321d00b80 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging import logging.config import os @@ -75,10 +76,8 @@ root: class LoggingConfig(Config): def read_config(self, config, **kwargs): - self.verbosity = config.get("verbose", 0) - self.no_redirect_stdio = config.get("no_redirect_stdio", False) self.log_config = self.abspath(config.get("log_config")) - self.log_file = self.abspath(config.get("log_file")) + self.no_redirect_stdio = config.get("no_redirect_stdio", False) def generate_config_section(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") @@ -94,38 +93,12 @@ class LoggingConfig(Config): ) def read_arguments(self, args): - if args.verbose is not None: - self.verbosity = args.verbose if args.no_redirect_stdio is not None: self.no_redirect_stdio = args.no_redirect_stdio - if args.log_config is not None: - self.log_config = args.log_config - if args.log_file is not None: - self.log_file = args.log_file @staticmethod def add_arguments(parser): logging_group = parser.add_argument_group("logging") - logging_group.add_argument( - "-v", - "--verbose", - dest="verbose", - action="count", - help="The verbosity level. Specify multiple times to increase " - "verbosity. (Ignored if --log-config is specified.)", - ) - logging_group.add_argument( - "-f", - "--log-file", - dest="log_file", - help="File to log to. (Ignored if --log-config is specified.)", - ) - logging_group.add_argument( - "--log-config", - dest="log_config", - default=None, - help="Python logging config file", - ) logging_group.add_argument( "-n", "--no-redirect-stdio", @@ -153,58 +126,29 @@ def setup_logging(config, use_worker_options=False): config (LoggingConfig | synapse.config.workers.WorkerConfig): configuration data - use_worker_options (bool): True to use 'worker_log_config' and - 'worker_log_file' options instead of 'log_config' and 'log_file'. + use_worker_options (bool): True to use the 'worker_log_config' option + instead of 'log_config'. register_sighup (func | None): Function to call to register a sighup handler. """ log_config = config.worker_log_config if use_worker_options else config.log_config - log_file = config.worker_log_file if use_worker_options else config.log_file - - log_format = ( - "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" - " - %(message)s" - ) if log_config is None: - # We don't have a logfile, so fall back to the 'verbosity' param from - # the config or cmdline. (Note that we generate a log config for new - # installs, so this will be an unusual case) - level = logging.INFO - level_for_storage = logging.INFO - if config.verbosity: - level = logging.DEBUG - if config.verbosity > 1: - level_for_storage = logging.DEBUG + log_format = ( + "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" + " - %(message)s" + ) logger = logging.getLogger("") - logger.setLevel(level) - - logging.getLogger("synapse.storage.SQL").setLevel(level_for_storage) + logger.setLevel(logging.INFO) + logging.getLogger("synapse.storage.SQL").setLevel(logging.INFO) formatter = logging.Formatter(log_format) - if log_file: - # TODO: Customisable file size / backup count - handler = logging.handlers.RotatingFileHandler( - log_file, maxBytes=(1000 * 1000 * 100), backupCount=3, encoding="utf8" - ) - - def sighup(signum, stack): - logger.info("Closing log file due to SIGHUP") - handler.doRollover() - logger.info("Opened new log file due to SIGHUP") - - else: - handler = logging.StreamHandler() - - def sighup(*args): - pass + handler = logging.StreamHandler() handler.setFormatter(formatter) - handler.addFilter(LoggingContextFilter(request="")) - logger.addHandler(handler) else: @@ -218,8 +162,7 @@ def setup_logging(config, use_worker_options=False): logging.info("Reloaded log config from %s due to SIGHUP", log_config) load_log_config() - - appbase.register_sighup(sighup) + appbase.register_sighup(sighup) # make sure that the first thing we log is a thing we can grep backwards # for diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 3b75471d85..246d72cd61 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -31,8 +31,6 @@ class WorkerConfig(Config): self.worker_listeners = config.get("worker_listeners", []) self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") - self.worker_log_file = config.get("worker_log_file") - self.worker_log_config = config.get("worker_log_config") # The host used to connect to the main synapse self.worker_replication_host = config.get("worker_replication_host", None) @@ -78,9 +76,5 @@ class WorkerConfig(Config): if args.daemonize is not None: self.worker_daemonize = args.daemonize - if args.log_config is not None: - self.worker_log_config = args.log_config - if args.log_file is not None: - self.worker_log_file = args.log_file if args.manhole is not None: self.worker_manhole = args.worker_manhole From 356ed0438e3081b48a29c71042620c0c68af3c25 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 19 Jul 2019 19:01:23 +1000 Subject: [PATCH 71/80] Speed up the PostgreSQL unit tests (#5717) --- .buildkite/pipeline.yml | 12 +++++++++--- changelog.d/5717.misc | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5717.misc diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 7f42fad909..d5e5aeec6b 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -117,8 +117,10 @@ steps: limit: 2 - label: ":python: 3.5 / :postgres: 9.5" + agents: + queue: "medium" env: - TRIAL_FLAGS: "-j 4" + TRIAL_FLAGS: "-j 8" command: - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" plugins: @@ -134,8 +136,10 @@ steps: limit: 2 - label: ":python: 3.7 / :postgres: 9.5" + agents: + queue: "medium" env: - TRIAL_FLAGS: "-j 4" + TRIAL_FLAGS: "-j 8" command: - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" plugins: @@ -151,8 +155,10 @@ steps: limit: 2 - label: ":python: 3.7 / :postgres: 11" + agents: + queue: "medium" env: - TRIAL_FLAGS: "-j 4" + TRIAL_FLAGS: "-j 8" command: - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" plugins: diff --git a/changelog.d/5717.misc b/changelog.d/5717.misc new file mode 100644 index 0000000000..07dc3bca94 --- /dev/null +++ b/changelog.d/5717.misc @@ -0,0 +1 @@ +Speed up PostgreSQL unit tests in CI. From b73ce4ba81fa059ef1a10db55e55decefe814649 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 19 Jul 2019 11:55:14 +0100 Subject: [PATCH 72/80] Update the coding style doc (#5719) A few fixes and removal of duplicated stuff, but mostly a bunch of the words on the config file. --- changelog.d/5719.misc | 1 + docs/code_style.rst | 134 +++++++++++++++++++++++++++++++----------- 2 files changed, 100 insertions(+), 35 deletions(-) create mode 100644 changelog.d/5719.misc diff --git a/changelog.d/5719.misc b/changelog.d/5719.misc new file mode 100644 index 0000000000..6d5294724c --- /dev/null +++ b/changelog.d/5719.misc @@ -0,0 +1 @@ +Update the coding style document. diff --git a/docs/code_style.rst b/docs/code_style.rst index e3ca626bfd..39ac4ebedc 100644 --- a/docs/code_style.rst +++ b/docs/code_style.rst @@ -1,4 +1,8 @@ -# Code Style +Code Style +========== + +Formatting tools +---------------- The Synapse codebase uses a number of code formatting tools in order to quickly and automatically check for formatting (and sometimes logical) errors @@ -6,20 +10,20 @@ in code. The necessary tools are detailed below. -## Formatting tools +- **black** -The Synapse codebase uses [black](https://pypi.org/project/black/) as an -opinionated code formatter, ensuring all comitted code is properly -formatted. + The Synapse codebase uses `black `_ as an + opinionated code formatter, ensuring all comitted code is properly + formatted. -First install ``black`` with:: + First install ``black`` with:: - pip install --upgrade black + pip install --upgrade black -Have ``black`` auto-format your code (it shouldn't change any -functionality) with:: + Have ``black`` auto-format your code (it shouldn't change any functionality) + with:: - black . --exclude="\.tox|build|env" + black . --exclude="\.tox|build|env" - **flake8** @@ -54,17 +58,16 @@ functionality is supported in your editor for a more convenient development workflow. It is not, however, recommended to run ``flake8`` on save as it takes a while and is very resource intensive. -## General rules +General rules +------------- - **Naming**: - Use camel case for class and type names - Use underscores for functions and variables. -- Use double quotes ``"foo"`` rather than single quotes ``'foo'``. - -- **Comments**: should follow the `google code style - `_. +- **Docstrings**: should follow the `google code style + `_. This is so that we can generate documentation with `sphinx `_. See the `examples @@ -73,6 +76,8 @@ takes a while and is very resource intensive. - **Imports**: + - Imports should be sorted by ``isort`` as described above. + - Prefer to import classes and functions rather than packages or modules. Example:: @@ -92,25 +97,84 @@ takes a while and is very resource intensive. This goes against the advice in the Google style guide, but it means that errors in the name are caught early (at import time). - - Multiple imports from the same package can be combined onto one line:: - - from synapse.types import GroupID, RoomID, UserID - - An effort should be made to keep the individual imports in alphabetical - order. - - If the list becomes long, wrap it with parentheses and split it over - multiple lines. - - - As per `PEP-8 `_, - imports should be grouped in the following order, with a blank line between - each group: - - 1. standard library imports - 2. related third party imports - 3. local application/library specific imports - - - Imports within each group should be sorted alphabetically by module name. - - Avoid wildcard imports (``from synapse.types import *``) and relative imports (``from .types import UserID``). + +Configuration file format +------------------------- + +The `sample configuration file <./sample_config.yaml>`_ acts as a reference to +Synapse's configuration options for server administrators. Remember that many +readers will be unfamiliar with YAML and server administration in general, so +that it is important that the file be as easy to understand as possible, which +includes following a consistent format. + +Some guidelines follow: + +* Sections should be separated with a heading consisting of a single line + prefixed and suffixed with ``##``. There should be **two** blank lines + before the section header, and **one** after. + +* Each option should be listed in the file with the following format: + + * A comment describing the setting. Each line of this comment should be + prefixed with a hash (``#``) and a space. + + The comment should describe the default behaviour (ie, what happens if + the setting is omitted), as well as what the effect will be if the + setting is changed. + + Often, the comment end with something like "uncomment the + following to \". + + * A line consisting of only ``#``. + + * A commented-out example setting, prefixed with only ``#``. + + For boolean (on/off) options, convention is that this example should be + the *opposite* to the default (so the comment will end with "Uncomment + the following to enable [or disable] \." For other options, + the example should give some non-default value which is likely to be + useful to the reader. + +* There should be a blank line between each option. + +* Where several settings are grouped into a single dict, *avoid* the + convention where the whole block is commented out, resulting in comment + lines starting ``# #``, as this is hard to read and confusing to + edit. Instead, leave the top-level config option uncommented, and follow + the conventions above for sub-options. Ensure that your code correctly + handles the top-level option being set to ``None`` (as it will be if no + sub-options are enabled). + +* Lines should be wrapped at 80 characters. + +Example:: + + ## Frobnication ## + + # The frobnicator will ensure that all requests are fully frobnicated. + # To enable it, uncomment the following. + # + #frobnicator_enabled: true + + # By default, the frobnicator will frobnicate with the default frobber. + # The following will make it use an alternative frobber. + # + #frobincator_frobber: special_frobber + + # Settings for the frobber + # + frobber: + # frobbing speed. Defaults to 1. + # + #speed: 10 + + # frobbing distance. Defaults to 1000. + # + #distance: 100 + +Note that the sample configuration is generated from the synapse code and is +maintained by a script, ``scripts-dev/generate_sample_config``. Making sure +that the output from this script matches the desired format is left as an +exercise for the reader! From 5c05ae7ba0c7ec97b84d55efdbc91446361bf9e1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 19 Jul 2019 12:03:36 +0100 Subject: [PATCH 73/80] Add 'rel' attribute to default welcome page. (#5695) add rel attribute as a precaution against reverse tabnabbing in future --- changelog.d/5695.misc | 1 + synapse/static/index.html | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5695.misc diff --git a/changelog.d/5695.misc b/changelog.d/5695.misc new file mode 100644 index 0000000000..4741d32e25 --- /dev/null +++ b/changelog.d/5695.misc @@ -0,0 +1 @@ +Add precautionary measures to prevent future abuse of `window.opener` in default welcome page. diff --git a/synapse/static/index.html b/synapse/static/index.html index d3f1c7dce0..bf46df9097 100644 --- a/synapse/static/index.html +++ b/synapse/static/index.html @@ -48,13 +48,13 @@

It works! Synapse is running

Your Synapse server is listening on this port and is ready for messages.

-

To use this server you'll need a Matrix client. +

To use this server you'll need a Matrix client.

Welcome to the Matrix universe :)


- + matrix.org From ebc5ed1296c433e97d4dcf1c8a5fc1477506e84e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Jul 2019 13:29:02 +0100 Subject: [PATCH 74/80] Update comment for new column --- .../schema/delta/56/current_state_events_membership.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/schema/delta/56/current_state_events_membership.sql b/synapse/storage/schema/delta/56/current_state_events_membership.sql index ec7ad5bae2..b2e08cd85d 100644 --- a/synapse/storage/schema/delta/56/current_state_events_membership.sql +++ b/synapse/storage/schema/delta/56/current_state_events_membership.sql @@ -16,6 +16,9 @@ -- We add membership to current state so that we don't need to join against -- room_memberships, which can be surprisingly costly (we do such queries -- very frequently). +-- This will be null for non-membership events and the content.membership key +-- for membership events. (Will also be null for membership events until the +-- background update job has finished). ALTER TABLE current_state_events ADD membership TEXT; INSERT INTO background_updates (update_name, progress_json) VALUES From bd2e1a2aa86b81d232f9d14d2a82a04de4b1643d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Jul 2019 13:36:04 +0100 Subject: [PATCH 75/80] LoggingTransaction accepts None for callback lists. Its a bit disingenuousto give LoggingTransaction lists to append callbacks to if we're not going to run the callbacks. --- synapse/storage/_base.py | 18 ++++++++++++++++-- synapse/storage/event_push_actions.py | 2 -- synapse/storage/roommember.py | 2 -- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 2f940dbae6..a7c93efa46 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -86,7 +86,21 @@ _CURRENT_STATE_CACHE_NAME = "cs_cache_fake" class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() - method.""" + method. + + Args: + txn: The database transcation object to wrap. + name (str): The name of this transactions for logging. + database_engine (Sqlite3Engine|PostgresEngine) + after_callbacks(list|None): A list that callbacks will be appended to + that have been added by `call_after` which should be run on + successful completion of the transaction. None indicates that no + callbacks should be allowed to be scheduled to run. + exception_callbacks(list|None): A list that callbacks will be appended + to that have been added by `call_on_exception` which should be run + if transaction ends with an error. None indicates that no callbacks + should be allowed to be scheduled to run. + """ __slots__ = [ "txn", @@ -97,7 +111,7 @@ class LoggingTransaction(object): ] def __init__( - self, txn, name, database_engine, after_callbacks, exception_callbacks + self, txn, name, database_engine, after_callbacks=None, exception_callbacks=None ): object.__setattr__(self, "txn", txn) object.__setattr__(self, "name", name) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index eca77069fd..dcfb67e029 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -79,8 +79,6 @@ class EventPushActionsWorkerStore(SQLBaseStore): db_conn.cursor(), name="_find_stream_orderings_for_times_txn", database_engine=self.database_engine, - after_callbacks=[], - exception_callbacks=[], ) self._find_stream_orderings_for_times_txn(cur) cur.close() diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 6541da3b8a..257bcdb2f8 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -70,8 +70,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): db_conn.cursor(), name="_check_safe_current_state_events_membership_updated", database_engine=self.database_engine, - after_callbacks=[], - exception_callbacks=[], ) self._check_safe_current_state_events_membership_updated_txn(txn) txn.close() From 2410335507b9fdaffb889755d76a11b0bea66f60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Jul 2019 11:34:15 +0100 Subject: [PATCH 76/80] Use upsert when updating destination retry interval --- synapse/storage/transactions.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index fd18619178..c585cf6cf7 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -196,6 +196,26 @@ class TransactionStore(SQLBaseStore): def _set_destination_retry_timings( self, txn, destination, retry_last_ts, retry_interval ): + + if self.database_engine.can_native_upsert: + # Upsert retry time interval if retry_interval is zero (i.e. we're + # resetting it) or greater than the existing retry interval. + + sql = """ + INSERT INTO destinations (destination, retry_last_ts, retry_interval) + VALUES (?, ?, ?) + ON CONFLICT (destination) DO UPDATE SET + retry_last_ts = EXCLUDED.retry_last_ts, + retry_interval = EXCLUDED.retry_interval + WHERE + EXCLUDED.retry_interval = 0 + OR destinations.retry_interval < EXCLUDED.retry_interval + """ + + txn.execute(sql, (destination, retry_last_ts, retry_interval)) + + return + self.database_engine.lock_table(txn, "destinations") # We need to be careful here as the data may have changed from under us From ced4fdaa84a9addcafc87ba1af6202de90dd2685 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Jul 2019 11:37:37 +0100 Subject: [PATCH 77/80] Newsfile --- changelog.d/5720.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5720.misc diff --git a/changelog.d/5720.misc b/changelog.d/5720.misc new file mode 100644 index 0000000000..590f64f19d --- /dev/null +++ b/changelog.d/5720.misc @@ -0,0 +1 @@ +Improve database query performance when recording retry intervals for remote hosts. From 7b8bc618340598623555782c34af862ace5012c3 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Fri, 19 Jul 2019 16:29:57 +0100 Subject: [PATCH 78/80] Don't accept opentracing data from clients. (#5715) * Don't accept opentracing data from clients. * newsfile --- changelog.d/5715.misc | 1 + synapse/logging/opentracing.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 changelog.d/5715.misc diff --git a/changelog.d/5715.misc b/changelog.d/5715.misc new file mode 100644 index 0000000000..a77366e0c0 --- /dev/null +++ b/changelog.d/5715.misc @@ -0,0 +1 @@ +Don't accept opentracing data from clients. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 415040f5ee..56d900080b 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -340,8 +340,7 @@ def trace_servlet(servlet_name, func): @wraps(func) @defer.inlineCallbacks def _trace_servlet_inner(request, *args, **kwargs): - with start_active_span_from_context( - request.requestHeaders, + with start_active_span( "incoming-client-request", tags={ "request_id": request.get_request_id(), From dc7cf81267e464858c74e6215184de0c634e2b26 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Fri, 19 Jul 2019 18:16:42 +0100 Subject: [PATCH 79/80] Remove deprecated 'verbose' cli arg --- demo/start.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/demo/start.sh b/demo/start.sh index 1c4f12d0bb..eccaa2abeb 100755 --- a/demo/start.sh +++ b/demo/start.sh @@ -29,7 +29,7 @@ for port in 8080 8081 8082; do if ! grep -F "Customisation made by demo/start.sh" -q $DIR/etc/$port.config; then printf '\n\n# Customisation made by demo/start.sh\n' >> $DIR/etc/$port.config - + echo 'enable_registration: true' >> $DIR/etc/$port.config # Warning, this heredoc depends on the interaction of tabs and spaces. Please don't @@ -43,7 +43,7 @@ for port in 8080 8081 8082; do tls: true resources: - names: [client, federation] - + - port: $port tls: false bind_addresses: ['::1', '127.0.0.1'] @@ -68,7 +68,7 @@ for port in 8080 8081 8082; do # Generate tls keys openssl req -x509 -newkey rsa:4096 -keyout $DIR/etc/localhost\:$https_port.tls.key -out $DIR/etc/localhost\:$https_port.tls.crt -days 365 -nodes -subj "/O=matrix" - + # Ignore keys from the trusted keys server echo '# Ignore keys from the trusted keys server' >> $DIR/etc/$port.config echo 'trusted_key_servers:' >> $DIR/etc/$port.config @@ -120,7 +120,6 @@ for port in 8080 8081 8082; do python3 -m synapse.app.homeserver \ --config-path "$DIR/etc/$port.config" \ -D \ - -vv \ popd done From f99554b15d25432e924f36ed01c8297346c2822c Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Fri, 19 Jul 2019 18:19:27 +0100 Subject: [PATCH 80/80] Revert "Remove deprecated 'verbose' cli arg" This reverts commit dc7cf81267e464858c74e6215184de0c634e2b26. --- demo/start.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/demo/start.sh b/demo/start.sh index eccaa2abeb..1c4f12d0bb 100755 --- a/demo/start.sh +++ b/demo/start.sh @@ -29,7 +29,7 @@ for port in 8080 8081 8082; do if ! grep -F "Customisation made by demo/start.sh" -q $DIR/etc/$port.config; then printf '\n\n# Customisation made by demo/start.sh\n' >> $DIR/etc/$port.config - + echo 'enable_registration: true' >> $DIR/etc/$port.config # Warning, this heredoc depends on the interaction of tabs and spaces. Please don't @@ -43,7 +43,7 @@ for port in 8080 8081 8082; do tls: true resources: - names: [client, federation] - + - port: $port tls: false bind_addresses: ['::1', '127.0.0.1'] @@ -68,7 +68,7 @@ for port in 8080 8081 8082; do # Generate tls keys openssl req -x509 -newkey rsa:4096 -keyout $DIR/etc/localhost\:$https_port.tls.key -out $DIR/etc/localhost\:$https_port.tls.crt -days 365 -nodes -subj "/O=matrix" - + # Ignore keys from the trusted keys server echo '# Ignore keys from the trusted keys server' >> $DIR/etc/$port.config echo 'trusted_key_servers:' >> $DIR/etc/$port.config @@ -120,6 +120,7 @@ for port in 8080 8081 8082; do python3 -m synapse.app.homeserver \ --config-path "$DIR/etc/$port.config" \ -D \ + -vv \ popd done