Split homeserver creation and setup (#19015)

### Background

As part of Element's plan to support a light form of vhosting (virtual
host) (multiple instances of Synapse in the same Python process), we're
currently diving into the details and implications of running multiple
instances of Synapse in the same Python process.

"Clean tenant provisioning" tracked internally by
https://github.com/element-hq/synapse-small-hosts/issues/221


### Partial startup problem

In the context of Synapse Pro for Small Hosts, since the Twisted reactor
is already running (from the `multi_synapse` shard process itself), when
provisioning a homeserver tenant, the `reactor.callWhenRunning(...)`
callbacks will be invoked immediately. This includes the Synapse's
[`start`](0615b64bb4/synapse/app/homeserver.py (L418-L429))
callback which sets up everything (including listeners, background
tasks, etc). If we encounter an error at this point, we are partially
setup but the exception will [bubble back to
us](8be122186b/multi_synapse/app/shard.py (L114-L121))
without us having a handle to the homeserver yet so we can't call
`hs.shutdown()` and clean everything up.


### What does this PR do?

Structures Synapse so we split creating the homeserver instance from
setting everything up. This way we have access to `hs` if anything goes
wrong during setup and can subsequently `hs.shutdown()` to clean
everything up.
This commit is contained in:
Eric Eastwood
2025-10-09 13:12:10 -05:00
committed by GitHub
parent d440cfc9e2
commit 715cc5ee37
4 changed files with 74 additions and 27 deletions

1
changelog.d/19015.misc Normal file
View File

@@ -0,0 +1 @@
Split homeserver creation (`create_homeserver`) and setup (`setup`).

View File

@@ -83,6 +83,10 @@ def gz_wrap(r: Resource) -> Resource:
class SynapseHomeServer(HomeServer):
"""
Homeserver class for the main Synapse process.
"""
DATASTORE_CLASS = DataStore
def _listener_http(
@@ -345,23 +349,17 @@ def load_or_generate_config(argv_options: List[str]) -> HomeServerConfig:
return config
def setup(
def create_homeserver(
config: HomeServerConfig,
reactor: Optional[ISynapseReactor] = None,
freeze: bool = True,
) -> SynapseHomeServer:
"""
Create and setup a Synapse homeserver instance given a configuration.
Create a homeserver instance for the Synapse main process.
Args:
config: The configuration for the homeserver.
reactor: Optionally provide a reactor to use. Can be useful in different
scenarios that you want control over the reactor, such as tests.
freeze: whether to freeze the homeserver base objects in the garbage collector.
May improve garbage collection performance by marking objects with an effectively
static lifetime as frozen so they don't need to be considered for cleanup.
If you ever want to `shutdown` the homeserver, this needs to be
False otherwise the homeserver cannot be garbage collected after `shutdown`.
Returns:
A homeserver instance.
@@ -372,7 +370,6 @@ def setup(
"You have specified `worker_app` in the config but are attempting to start a non-worker "
"instance. Please use `python -m synapse.app.generic_worker` instead (or remove the option if this is the main process)."
)
sys.exit(1)
events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
@@ -397,24 +394,50 @@ def setup(
)
hs = SynapseHomeServer(
config.server.server_name,
hostname=config.server.server_name,
config=config,
reactor=reactor,
)
setup_logging(hs, config, use_worker_options=False)
return hs
def setup(
hs: SynapseHomeServer,
*,
freeze: bool = True,
) -> None:
"""
Setup a Synapse homeserver instance given a configuration.
Args:
hs: The homeserver to setup.
freeze: whether to freeze the homeserver base objects in the garbage collector.
May improve garbage collection performance by marking objects with an effectively
static lifetime as frozen so they don't need to be considered for cleanup.
If you ever want to `shutdown` the homeserver, this needs to be
False otherwise the homeserver cannot be garbage collected after `shutdown`.
Returns:
A homeserver instance.
"""
setup_logging(hs, hs.config, use_worker_options=False)
# Log after we've configured logging.
logger.info("Setting up server")
# Start the tracer
init_tracer(hs) # noqa
logger.info("Setting up server")
try:
hs.setup()
except Exception as e:
handle_startup_exception(e)
async def start() -> None:
async def _start_when_reactor_running() -> None:
# TODO: Feels like this should be moved somewhere else.
#
# Load the OIDC provider metadatas, if OIDC is enabled.
if hs.config.oidc.oidc_enabled:
oidc = hs.get_oidc_handler()
@@ -423,21 +446,31 @@ def setup(
await _base.start(hs, freeze)
# TODO: This should be moved to `SynapseHomeServer.start_background_tasks` (not
# `HomeServer.start_background_tasks`) (this way it matches the behavior of only
# running on `main`)
hs.get_datastores().main.db_pool.updates.start_doing_background_updates()
register_start(hs, start)
return hs
# Register a callback to be invoked once the reactor is running
register_start(hs, _start_when_reactor_running)
def run(hs: HomeServer) -> None:
def start_reactor(
config: HomeServerConfig,
) -> None:
"""
Start the reactor (Twisted event-loop).
Args:
config: The configuration for the homeserver.
"""
_base.start_reactor(
"synapse-homeserver",
soft_file_limit=hs.config.server.soft_file_limit,
gc_thresholds=hs.config.server.gc_thresholds,
pid_file=hs.config.server.pid_file,
daemonize=hs.config.server.daemonize,
print_pidfile=hs.config.server.print_pidfile,
soft_file_limit=config.server.soft_file_limit,
gc_thresholds=config.server.gc_thresholds,
pid_file=config.server.pid_file,
daemonize=config.server.daemonize,
print_pidfile=config.server.print_pidfile,
logger=logger,
)
@@ -448,13 +481,14 @@ def main() -> None:
with LoggingContext(name="main", server_name=homeserver_config.server.server_name):
# check base requirements
check_requirements()
hs = setup(homeserver_config)
hs = create_homeserver(homeserver_config)
setup(hs)
# redirect stdio to the logs, if configured.
if not hs.config.logging.no_redirect_stdio:
redirect_stdio_to_logs()
run(hs)
start_reactor(homeserver_config)
if __name__ == "__main__":

View File

@@ -37,7 +37,13 @@ class HomeserverAppStartTestCase(ConfigFileTestCase):
self.add_lines_to_config([" main:", " host: 127.0.0.1", " port: 1234"])
# Ensure that starting master process with worker config raises an exception
with self.assertRaises(ConfigError):
# Do a normal homeserver creation and setup
homeserver_config = synapse.app.homeserver.load_or_generate_config(
["-c", self.config_file]
)
synapse.app.homeserver.setup(homeserver_config)
# XXX: The error will be raised at this point
hs = synapse.app.homeserver.create_homeserver(homeserver_config)
# Continue with the setup. We don't expect this to run because we raised
# earlier, but in the future, the code could be refactored to raise the
# error in a different place.
synapse.app.homeserver.setup(hs)

View File

@@ -112,7 +112,13 @@ class RegistrationConfigTestCase(ConfigFileTestCase):
# Test that allowing open registration without verification raises an error
with self.assertRaises(ConfigError):
# Do a normal homeserver creation and setup
homeserver_config = synapse.app.homeserver.load_or_generate_config(
["-c", self.config_file]
)
synapse.app.homeserver.setup(homeserver_config)
# XXX: The error will be raised at this point
hs = synapse.app.homeserver.create_homeserver(homeserver_config)
# Continue with the setup. We don't expect this to run because we raised
# earlier, but in the future, the code could be refactored to raise the
# error in a different place.
synapse.app.homeserver.setup(hs)