Remove sentinel logcontext where we log in setup, start and exit (#18870)

Remove `sentinel` logcontext where we log in `setup`, `start`, and exit.

Instead of having one giant PR that removes all places we use `sentinel`
logcontext, I've decided to tackle this more piece-meal. This PR covers
the parts if you just startup Synapse and exit it with no requests or
activity going on in between.

Part of https://github.com/element-hq/synapse/issues/18905 (Remove
`sentinel` logcontext where we log in Synapse)

Prerequisite for https://github.com/element-hq/synapse/pull/18868.
Logging with the `sentinel` logcontext means we won't know which server
the log came from.



### Why


9cc4001778/docs/log_contexts.md (L71-L81)

(docs updated in https://github.com/element-hq/synapse/pull/18900)


### Testing strategy

1. Run Synapse normally and with `daemonize: true`: `poetry run
synapse_homeserver --config-path homeserver.yaml`
 1. Execute some requests
 1. Shutdown the server
 1. Look for any bad log entries in your homeserver logs:
    - `Expected logging context sentinel but found main`
    - `Expected logging context main was lost`
    - `Expected previous context`
    - `utime went backwards!`/`stime went backwards!`
- `Called stop on logcontext POST-0 without recording a start rusage`
 1. Look for any logs coming from the `sentinel` context


With these changes, you should only see the following logs (not from
Synapse) using the `sentinel` context if you start up Synapse and exit:

`homeserver.log`
```
2025-09-10 14:45:39,924 - asyncio - 64 - DEBUG - sentinel - Using selector: EpollSelector

2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - Received SIGINT, shutting down.

2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - (TCP Port 9322 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 8008 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 9093 Closed)
2025-09-10 14:45:40,564 - twisted - 281 - INFO - sentinel - Main loop terminated.
```
This commit is contained in:
Eric Eastwood
2025-09-16 17:15:08 -05:00
committed by GitHub
parent 2bed3fb566
commit 84d64251dc
5 changed files with 53 additions and 33 deletions

1
changelog.d/18870.misc Normal file
View File

@@ -0,0 +1 @@
Remove `sentinel` logcontext usage where we log in `setup`, `start` and exit.

View File

@@ -72,7 +72,7 @@ from synapse.events.auto_accept_invites import InviteAutoAccepter
from synapse.events.presence_router import load_legacy_presence_router
from synapse.handlers.auth import load_legacy_password_auth_providers
from synapse.http.site import SynapseSite
from synapse.logging.context import PreserveLoggingContext
from synapse.logging.context import LoggingContext, PreserveLoggingContext
from synapse.logging.opentracing import init_tracer
from synapse.metrics import install_gc_manager, register_threadpool
from synapse.metrics.background_process_metrics import run_as_background_process
@@ -183,25 +183,23 @@ def start_reactor(
if gc_thresholds:
gc.set_threshold(*gc_thresholds)
install_gc_manager()
run_command()
# make sure that we run the reactor with the sentinel log context,
# otherwise other PreserveLoggingContext instances will get confused
# and complain when they see the logcontext arbitrarily swapping
# between the sentinel and `run` logcontexts.
#
# We also need to drop the logcontext before forking if we're daemonizing,
# otherwise the cputime metrics get confused about the per-thread resource usage
# appearing to go backwards.
with PreserveLoggingContext():
if daemonize:
assert pid_file is not None
# Reset the logging context when we start the reactor (whenever we yield control
# to the reactor, the `sentinel` logging context needs to be set so we don't
# leak the current logging context and erroneously apply it to the next task the
# reactor event loop picks up)
with PreserveLoggingContext():
run_command()
if print_pidfile:
print(pid_file)
if daemonize:
assert pid_file is not None
daemonize_process(pid_file, logger)
run()
if print_pidfile:
print(pid_file)
daemonize_process(pid_file, logger)
run()
def quit_with_error(error_string: str) -> NoReturn:
@@ -601,10 +599,12 @@ async def start(hs: "HomeServer") -> None:
hs.get_datastores().main.db_pool.start_profiling()
hs.get_pusherpool().start()
def log_shutdown() -> None:
with LoggingContext("log_shutdown"):
logger.info("Shutting down...")
# Log when we start the shut down process.
hs.get_reactor().addSystemEventTrigger(
"before", "shutdown", logger.info, "Shutting down..."
)
hs.get_reactor().addSystemEventTrigger("before", "shutdown", log_shutdown)
setup_sentry(hs)
setup_sdnotify(hs)

View File

@@ -355,7 +355,12 @@ def start(config_options: List[str]) -> None:
except Exception as e:
handle_startup_exception(e)
register_start(_base.start, hs)
async def start() -> None:
# Re-establish log context now that we're back from the reactor
with LoggingContext("start"):
await _base.start(hs)
register_start(start)
# redirect stdio to the logs, if configured.
if not hs.config.logging.no_redirect_stdio:

View File

@@ -377,15 +377,17 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
handle_startup_exception(e)
async def start() -> None:
# Load the OIDC provider metadatas, if OIDC is enabled.
if hs.config.oidc.oidc_enabled:
oidc = hs.get_oidc_handler()
# Loading the provider metadata also ensures the provider config is valid.
await oidc.load_metadata()
# Re-establish log context now that we're back from the reactor
with LoggingContext("start"):
# Load the OIDC provider metadatas, if OIDC is enabled.
if hs.config.oidc.oidc_enabled:
oidc = hs.get_oidc_handler()
# Loading the provider metadata also ensures the provider config is valid.
await oidc.load_metadata()
await _base.start(hs)
await _base.start(hs)
hs.get_datastores().main.db_pool.updates.start_doing_background_updates()
hs.get_datastores().main.db_pool.updates.start_doing_background_updates()
register_start(start)

View File

@@ -29,6 +29,11 @@ import sys
from types import FrameType, TracebackType
from typing import NoReturn, Optional, Type
from synapse.logging.context import (
LoggingContext,
PreserveLoggingContext,
)
def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -> None:
"""daemonize the current process
@@ -64,8 +69,14 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
pid_fh.write(old_pid)
sys.exit(1)
# Fork, creating a new process for the child.
process_id = os.fork()
# Stop the existing context *before* we fork the process. Otherwise the cputime
# metrics get confused about the per-thread resource usage appearing to go backwards
# because we're comparing the resource usage from the original process to the forked
# process. `PreserveLoggingContext` already takes care of restarting the original
# context *after* the block.
with PreserveLoggingContext():
# Fork, creating a new process for the child.
process_id = os.fork()
if process_id != 0:
# parent process: exit.
@@ -140,9 +151,10 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
# Cleanup pid file at exit.
def exit() -> None:
logger.warning("Stopping daemon.")
os.remove(pid_file)
sys.exit(0)
with LoggingContext("atexit"):
logger.warning("Stopping daemon.")
os.remove(pid_file)
sys.exit(0)
atexit.register(exit)