Remove sentinel logcontext where we log in setup, start and exit (#18870)
Remove `sentinel` logcontext where we log in `setup`, `start`, and exit.
Instead of having one giant PR that removes all places we use `sentinel`
logcontext, I've decided to tackle this more piece-meal. This PR covers
the parts if you just startup Synapse and exit it with no requests or
activity going on in between.
Part of https://github.com/element-hq/synapse/issues/18905 (Remove
`sentinel` logcontext where we log in Synapse)
Prerequisite for https://github.com/element-hq/synapse/pull/18868.
Logging with the `sentinel` logcontext means we won't know which server
the log came from.
### Why
9cc4001778/docs/log_contexts.md (L71-L81)
(docs updated in https://github.com/element-hq/synapse/pull/18900)
### Testing strategy
1. Run Synapse normally and with `daemonize: true`: `poetry run
synapse_homeserver --config-path homeserver.yaml`
1. Execute some requests
1. Shutdown the server
1. Look for any bad log entries in your homeserver logs:
- `Expected logging context sentinel but found main`
- `Expected logging context main was lost`
- `Expected previous context`
- `utime went backwards!`/`stime went backwards!`
- `Called stop on logcontext POST-0 without recording a start rusage`
1. Look for any logs coming from the `sentinel` context
With these changes, you should only see the following logs (not from
Synapse) using the `sentinel` context if you start up Synapse and exit:
`homeserver.log`
```
2025-09-10 14:45:39,924 - asyncio - 64 - DEBUG - sentinel - Using selector: EpollSelector
2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - Received SIGINT, shutting down.
2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - (TCP Port 9322 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 8008 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 9093 Closed)
2025-09-10 14:45:40,564 - twisted - 281 - INFO - sentinel - Main loop terminated.
```
This commit is contained in:
1
changelog.d/18870.misc
Normal file
1
changelog.d/18870.misc
Normal file
@@ -0,0 +1 @@
|
||||
Remove `sentinel` logcontext usage where we log in `setup`, `start` and exit.
|
||||
@@ -72,7 +72,7 @@ from synapse.events.auto_accept_invites import InviteAutoAccepter
|
||||
from synapse.events.presence_router import load_legacy_presence_router
|
||||
from synapse.handlers.auth import load_legacy_password_auth_providers
|
||||
from synapse.http.site import SynapseSite
|
||||
from synapse.logging.context import PreserveLoggingContext
|
||||
from synapse.logging.context import LoggingContext, PreserveLoggingContext
|
||||
from synapse.logging.opentracing import init_tracer
|
||||
from synapse.metrics import install_gc_manager, register_threadpool
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
@@ -183,25 +183,23 @@ def start_reactor(
|
||||
if gc_thresholds:
|
||||
gc.set_threshold(*gc_thresholds)
|
||||
install_gc_manager()
|
||||
run_command()
|
||||
|
||||
# make sure that we run the reactor with the sentinel log context,
|
||||
# otherwise other PreserveLoggingContext instances will get confused
|
||||
# and complain when they see the logcontext arbitrarily swapping
|
||||
# between the sentinel and `run` logcontexts.
|
||||
#
|
||||
# We also need to drop the logcontext before forking if we're daemonizing,
|
||||
# otherwise the cputime metrics get confused about the per-thread resource usage
|
||||
# appearing to go backwards.
|
||||
with PreserveLoggingContext():
|
||||
if daemonize:
|
||||
assert pid_file is not None
|
||||
# Reset the logging context when we start the reactor (whenever we yield control
|
||||
# to the reactor, the `sentinel` logging context needs to be set so we don't
|
||||
# leak the current logging context and erroneously apply it to the next task the
|
||||
# reactor event loop picks up)
|
||||
with PreserveLoggingContext():
|
||||
run_command()
|
||||
|
||||
if print_pidfile:
|
||||
print(pid_file)
|
||||
if daemonize:
|
||||
assert pid_file is not None
|
||||
|
||||
daemonize_process(pid_file, logger)
|
||||
run()
|
||||
if print_pidfile:
|
||||
print(pid_file)
|
||||
|
||||
daemonize_process(pid_file, logger)
|
||||
|
||||
run()
|
||||
|
||||
|
||||
def quit_with_error(error_string: str) -> NoReturn:
|
||||
@@ -601,10 +599,12 @@ async def start(hs: "HomeServer") -> None:
|
||||
hs.get_datastores().main.db_pool.start_profiling()
|
||||
hs.get_pusherpool().start()
|
||||
|
||||
def log_shutdown() -> None:
|
||||
with LoggingContext("log_shutdown"):
|
||||
logger.info("Shutting down...")
|
||||
|
||||
# Log when we start the shut down process.
|
||||
hs.get_reactor().addSystemEventTrigger(
|
||||
"before", "shutdown", logger.info, "Shutting down..."
|
||||
)
|
||||
hs.get_reactor().addSystemEventTrigger("before", "shutdown", log_shutdown)
|
||||
|
||||
setup_sentry(hs)
|
||||
setup_sdnotify(hs)
|
||||
|
||||
@@ -355,7 +355,12 @@ def start(config_options: List[str]) -> None:
|
||||
except Exception as e:
|
||||
handle_startup_exception(e)
|
||||
|
||||
register_start(_base.start, hs)
|
||||
async def start() -> None:
|
||||
# Re-establish log context now that we're back from the reactor
|
||||
with LoggingContext("start"):
|
||||
await _base.start(hs)
|
||||
|
||||
register_start(start)
|
||||
|
||||
# redirect stdio to the logs, if configured.
|
||||
if not hs.config.logging.no_redirect_stdio:
|
||||
|
||||
@@ -377,15 +377,17 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
|
||||
handle_startup_exception(e)
|
||||
|
||||
async def start() -> None:
|
||||
# Load the OIDC provider metadatas, if OIDC is enabled.
|
||||
if hs.config.oidc.oidc_enabled:
|
||||
oidc = hs.get_oidc_handler()
|
||||
# Loading the provider metadata also ensures the provider config is valid.
|
||||
await oidc.load_metadata()
|
||||
# Re-establish log context now that we're back from the reactor
|
||||
with LoggingContext("start"):
|
||||
# Load the OIDC provider metadatas, if OIDC is enabled.
|
||||
if hs.config.oidc.oidc_enabled:
|
||||
oidc = hs.get_oidc_handler()
|
||||
# Loading the provider metadata also ensures the provider config is valid.
|
||||
await oidc.load_metadata()
|
||||
|
||||
await _base.start(hs)
|
||||
await _base.start(hs)
|
||||
|
||||
hs.get_datastores().main.db_pool.updates.start_doing_background_updates()
|
||||
hs.get_datastores().main.db_pool.updates.start_doing_background_updates()
|
||||
|
||||
register_start(start)
|
||||
|
||||
|
||||
@@ -29,6 +29,11 @@ import sys
|
||||
from types import FrameType, TracebackType
|
||||
from typing import NoReturn, Optional, Type
|
||||
|
||||
from synapse.logging.context import (
|
||||
LoggingContext,
|
||||
PreserveLoggingContext,
|
||||
)
|
||||
|
||||
|
||||
def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -> None:
|
||||
"""daemonize the current process
|
||||
@@ -64,8 +69,14 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
|
||||
pid_fh.write(old_pid)
|
||||
sys.exit(1)
|
||||
|
||||
# Fork, creating a new process for the child.
|
||||
process_id = os.fork()
|
||||
# Stop the existing context *before* we fork the process. Otherwise the cputime
|
||||
# metrics get confused about the per-thread resource usage appearing to go backwards
|
||||
# because we're comparing the resource usage from the original process to the forked
|
||||
# process. `PreserveLoggingContext` already takes care of restarting the original
|
||||
# context *after* the block.
|
||||
with PreserveLoggingContext():
|
||||
# Fork, creating a new process for the child.
|
||||
process_id = os.fork()
|
||||
|
||||
if process_id != 0:
|
||||
# parent process: exit.
|
||||
@@ -140,9 +151,10 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
|
||||
|
||||
# Cleanup pid file at exit.
|
||||
def exit() -> None:
|
||||
logger.warning("Stopping daemon.")
|
||||
os.remove(pid_file)
|
||||
sys.exit(0)
|
||||
with LoggingContext("atexit"):
|
||||
logger.warning("Stopping daemon.")
|
||||
os.remove(pid_file)
|
||||
sys.exit(0)
|
||||
|
||||
atexit.register(exit)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user