1
0
Files
synapse/synapse/util/daemonize.py
Eric Eastwood 84d64251dc Remove sentinel logcontext where we log in setup, start and exit (#18870)
Remove `sentinel` logcontext where we log in `setup`, `start`, and exit.

Instead of having one giant PR that removes all places we use `sentinel`
logcontext, I've decided to tackle this more piece-meal. This PR covers
the parts if you just startup Synapse and exit it with no requests or
activity going on in between.

Part of https://github.com/element-hq/synapse/issues/18905 (Remove
`sentinel` logcontext where we log in Synapse)

Prerequisite for https://github.com/element-hq/synapse/pull/18868.
Logging with the `sentinel` logcontext means we won't know which server
the log came from.



### Why


9cc4001778/docs/log_contexts.md (L71-L81)

(docs updated in https://github.com/element-hq/synapse/pull/18900)


### Testing strategy

1. Run Synapse normally and with `daemonize: true`: `poetry run
synapse_homeserver --config-path homeserver.yaml`
 1. Execute some requests
 1. Shutdown the server
 1. Look for any bad log entries in your homeserver logs:
    - `Expected logging context sentinel but found main`
    - `Expected logging context main was lost`
    - `Expected previous context`
    - `utime went backwards!`/`stime went backwards!`
- `Called stop on logcontext POST-0 without recording a start rusage`
 1. Look for any logs coming from the `sentinel` context


With these changes, you should only see the following logs (not from
Synapse) using the `sentinel` context if you start up Synapse and exit:

`homeserver.log`
```
2025-09-10 14:45:39,924 - asyncio - 64 - DEBUG - sentinel - Using selector: EpollSelector

2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - Received SIGINT, shutting down.

2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - (TCP Port 9322 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 8008 Closed)
2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 9093 Closed)
2025-09-10 14:45:40,564 - twisted - 281 - INFO - sentinel - Main loop terminated.
```
2025-09-16 17:15:08 -05:00

162 lines
5.6 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2020 The Matrix.org Foundation C.I.C.
# Copyright (c) 2012, 2013, 2014 Ilya Otyutskiy <ilya.otyutskiy@icloud.com>
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import atexit
import fcntl
import logging
import os
import signal
import sys
from types import FrameType, TracebackType
from typing import NoReturn, Optional, Type
from synapse.logging.context import (
LoggingContext,
PreserveLoggingContext,
)
def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -> None:
"""daemonize the current process
This calls fork(), and has the main process exit. When it returns we will be
running in the child process.
"""
# If pidfile already exists, we should read pid from there; to overwrite it, if
# locking will fail, because locking attempt somehow purges the file contents.
if os.path.isfile(pid_file):
with open(pid_file) as pid_fh:
old_pid = pid_fh.read()
# Create a lockfile so that only one instance of this daemon is running at any time.
try:
lock_fh = open(pid_file, "w")
except OSError:
print("Unable to create the pidfile.")
sys.exit(1)
try:
# Try to get an exclusive lock on the file. This will fail if another process
# has the file locked.
fcntl.flock(lock_fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
print("Unable to lock on the pidfile.")
# We need to overwrite the pidfile if we got here.
#
# XXX better to avoid overwriting it, surely. this looks racey as the pid file
# could be created between us trying to read it and us trying to lock it.
with open(pid_file, "w") as pid_fh:
pid_fh.write(old_pid)
sys.exit(1)
# Stop the existing context *before* we fork the process. Otherwise the cputime
# metrics get confused about the per-thread resource usage appearing to go backwards
# because we're comparing the resource usage from the original process to the forked
# process. `PreserveLoggingContext` already takes care of restarting the original
# context *after* the block.
with PreserveLoggingContext():
# Fork, creating a new process for the child.
process_id = os.fork()
if process_id != 0:
# parent process: exit.
# we use os._exit to avoid running the atexit handlers. In particular, that
# means we don't flush the logs. This is important because if we are using
# a MemoryHandler, we could have logs buffered which are now buffered in both
# the main and the child process, so if we let the main process flush the logs,
# we'll get two copies.
os._exit(0)
# This is the child process. Continue.
# Stop listening for signals that the parent process receives.
# This is done by getting a new process id.
# setpgrp() is an alternative to setsid().
# setsid puts the process in a new parent group and detaches its controlling
# terminal.
os.setsid()
# point stdin, stdout, stderr at /dev/null
devnull = "/dev/null"
if hasattr(os, "devnull"):
# Python has set os.devnull on this system, use it instead as it might be
# different than /dev/null.
devnull = os.devnull
devnull_fd = os.open(devnull, os.O_RDWR)
os.dup2(devnull_fd, 0)
os.dup2(devnull_fd, 1)
os.dup2(devnull_fd, 2)
os.close(devnull_fd)
# now that we have redirected stderr to /dev/null, any uncaught exceptions will
# get sent to /dev/null, so make sure we log them.
#
# (we don't normally expect reactor.run to raise any exceptions, but this will
# also catch any other uncaught exceptions before we get that far.)
def excepthook(
type_: Type[BaseException],
value: BaseException,
traceback: Optional[TracebackType],
) -> None:
logger.critical("Unhanded exception", exc_info=(type_, value, traceback))
sys.excepthook = excepthook
# Set umask to default to safe file permissions when running as a root daemon. 027
# is an octal number which we are typing as 0o27 for Python3 compatibility.
os.umask(0o27)
# Change to a known directory. If this isn't done, starting a daemon in a
# subdirectory that needs to be deleted results in "directory busy" errors.
os.chdir(chdir)
try:
lock_fh.write("%s" % (os.getpid()))
lock_fh.flush()
except OSError:
logger.error("Unable to write pid to the pidfile.")
print("Unable to write pid to the pidfile.")
sys.exit(1)
# write a log line on SIGTERM.
def sigterm(signum: int, frame: Optional[FrameType]) -> NoReturn:
logger.warning("Caught signal %s. Stopping daemon.", signum)
sys.exit(0)
signal.signal(signal.SIGTERM, sigterm)
# Cleanup pid file at exit.
def exit() -> None:
with LoggingContext("atexit"):
logger.warning("Stopping daemon.")
os.remove(pid_file)
sys.exit(0)
atexit.register(exit)
logger.warning("Starting daemon.")