diff --git a/changelog.d/19324.docker b/changelog.d/19324.docker new file mode 100644 index 0000000000..52bf9cb7ae --- /dev/null +++ b/changelog.d/19324.docker @@ -0,0 +1 @@ +Add a way to expose metrics from the Docker image (`SYNAPSE_ENABLE_METRICS`). diff --git a/docker/Dockerfile b/docker/Dockerfile index 6d10dee1aa..91116ea1c4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -188,7 +188,13 @@ COPY --from=builder --exclude=.lock /install /usr/local COPY ./docker/start.py /start.py COPY ./docker/conf /conf -EXPOSE 8008/tcp 8009/tcp 8448/tcp +# 8008: CS Matrix API port from Synapse +# 8448: SS Matrix API port from Synapse +EXPOSE 8008/tcp 8448/tcp +# 19090: Metrics listener port for the main process (metrics must be enabled with +# SYNAPSE_ENABLE_METRICS=1). Metrics for workers are on ports starting from 19091 but +# since these are dynamic we don't expose them by default. +EXPOSE 19090/tcp ENTRYPOINT ["/start.py"] diff --git a/docker/README-testing.md b/docker/README-testing.md index db88598b8c..009a5d612d 100644 --- a/docker/README-testing.md +++ b/docker/README-testing.md @@ -135,3 +135,5 @@ but it does not serve TLS by default. You can configure `SYNAPSE_TLS_CERT` and `SYNAPSE_TLS_KEY` to point to a TLS certificate and key (respectively), both in PEM (textual) format. In this case, Nginx will additionally serve using HTTPS on port 8448. + + diff --git a/docker/README.md b/docker/README.md index 3438e9c441..ed5155f541 100644 --- a/docker/README.md +++ b/docker/README.md @@ -75,6 +75,9 @@ The following environment variables are supported in `generate` mode: particularly tricky. * `SYNAPSE_LOG_TESTING`: if set, Synapse will log additional information useful for testing. +* `SYNAPSE_ENABLE_METRICS`: if set to `1`, the metrics listener will be enabled on the + main and worker processes. Defaults to `0` (disabled). The main process will listen on + port `19090` and workers on port `19091 + `. ## Postgres diff --git a/docker/conf-workers/shared.yaml.j2 b/docker/conf-workers/shared.yaml.j2 index 1dfc60ad11..6efbd05472 100644 --- a/docker/conf-workers/shared.yaml.j2 +++ b/docker/conf-workers/shared.yaml.j2 @@ -20,4 +20,9 @@ app_service_config_files: {%- endfor %} {%- endif %} +{# Controlled by SYNAPSE_ENABLE_METRICS #} +{% if enable_metrics %} +enable_metrics: true +{% endif %} + {{ shared_worker_config }} diff --git a/docker/conf-workers/worker.yaml.j2 b/docker/conf-workers/worker.yaml.j2 index 29ec74b4ea..d4d2d4d4cf 100644 --- a/docker/conf-workers/worker.yaml.j2 +++ b/docker/conf-workers/worker.yaml.j2 @@ -21,6 +21,14 @@ worker_listeners: {%- endfor %} {% endif %} +{# Controlled by SYNAPSE_ENABLE_METRICS #} +{% if metrics_port %} + - type: metrics + # Prometheus does not support Unix sockets so we don't bother with + # `SYNAPSE_USE_UNIX_SOCKET`, https://github.com/prometheus/prometheus/issues/12024 + port: {{ metrics_port }} +{% endif %} + worker_log_config: {{ worker_log_config_filepath }} {{ worker_extra_conf }} diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index 2890990705..e2e1338673 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -53,6 +53,15 @@ listeners: - names: [federation] compress: false +{% if SYNAPSE_ENABLE_METRICS %} + - type: metrics + # The main process always uses the same port 19090 + # + # Prometheus does not support Unix sockets so we don't bother with + # `SYNAPSE_USE_UNIX_SOCKET`, https://github.com/prometheus/prometheus/issues/12024 + port: 19090 +{% endif %} + ## Database ## {% if POSTGRES_PASSWORD %} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index e7cbd701b8..c03f2a5e56 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -49,6 +49,10 @@ # regardless of the SYNAPSE_LOG_LEVEL setting. # * SYNAPSE_LOG_TESTING: if set, Synapse will log additional information useful # for testing. +# * SYNAPSE_USE_UNIX_SOCKET: TODO +# * `SYNAPSE_ENABLE_METRICS`: if set to `1`, the metrics listener will be enabled on the +# main and worker processes. Defaults to `0` (disabled). The main process will listen on +# port `19090` and workers on port `19091 + `. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -758,6 +762,9 @@ def generate_worker_files( # Convenience helper for if using unix sockets instead of host:port using_unix_sockets = environ.get("SYNAPSE_USE_UNIX_SOCKET", False) + + enable_metrics = environ.get("SYNAPSE_ENABLE_METRICS", "0") == "1" + # First read the original config file and extract the listeners block. Then we'll # add another listener for replication. Later we'll write out the result to the # shared config file. @@ -789,7 +796,11 @@ def generate_worker_files( # base shared worker jinja2 template. This config file will be passed to all # workers, included Synapse's main process. It is intended mainly for disabling # functionality when certain workers are spun up, and adding a replication listener. - shared_config: dict[str, Any] = {"listeners": listeners} + shared_config: dict[str, Any] = { + "listeners": listeners, + # Controls `enable_metrics: true` + "enable_metrics": enable_metrics, + } # List of dicts that describe workers. # We pass this to the Supervisor template later to generate the appropriate @@ -816,6 +827,8 @@ def generate_worker_files( # Start worker ports from this arbitrary port worker_port = 18009 + # The main process metrics port is 19090, so start workers from 19091 + worker_metrics_port = 19091 # A list of internal endpoints to healthcheck, starting with the main process # which exists even if no workers do. @@ -862,10 +875,15 @@ def generate_worker_files( {"name": worker_name, "port": str(worker_port), "config_path": config_path} ) - # Update the shared config with any worker_type specific options. The first of a - # given worker_type needs to stay assigned and not be replaced. - worker_config["shared_extra_conf"].update(shared_config) - shared_config = worker_config["shared_extra_conf"] + # Keep the `shared_config` up to date with the `shared_extra_conf` from each + # worker. + shared_config = { + **worker_config["shared_extra_conf"], + # We combine `shared_config` second to avoid overwriting existing keys + # because TODO: why? + **shared_config, + } + if using_unix_sockets: healthcheck_urls.append( f"--unix-socket /run/worker.{worker_port} http://localhost/health" @@ -891,6 +909,10 @@ def generate_worker_files( # Write out the worker's logging config file log_config_filepath = generate_worker_log_config(environ, worker_name, data_dir) + if enable_metrics: + # Enable prometheus metrics endpoint on this worker + worker_config["metrics_port"] = worker_metrics_port + # Then a worker config file convert( "/conf/worker.yaml.j2", @@ -905,6 +927,7 @@ def generate_worker_files( nginx_upstreams.setdefault(worker_type, set()).add(worker_port) worker_port += 1 + worker_metrics_port += 1 # Build the nginx location config blocks nginx_location_config = "" diff --git a/docker/start.py b/docker/start.py index c88d23695f..19f1ab5075 100755 --- a/docker/start.py +++ b/docker/start.py @@ -31,6 +31,25 @@ def flush_buffers() -> None: sys.stderr.flush() +def strtobool(val: str) -> bool: + """Convert a string representation of truth to True or False + + True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values + are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if + 'val' is anything else. + + This is lifted from distutils.util.strtobool, with the exception that it actually + returns a bool, rather than an int. + """ + val = val.lower() + if val in ("y", "yes", "t", "true", "on", "1"): + return True + elif val in ("n", "no", "f", "false", "off", "0"): + return False + else: + raise ValueError("invalid truth value %r" % (val,)) + + def convert(src: str, dst: str, environ: Mapping[str, object]) -> None: """Generate a file from a template @@ -98,19 +117,16 @@ def generate_config_from_template( os.mkdir(config_dir) # Convert SYNAPSE_NO_TLS to boolean if exists - if "SYNAPSE_NO_TLS" in environ: - tlsanswerstring = str.lower(environ["SYNAPSE_NO_TLS"]) - if tlsanswerstring in ("true", "on", "1", "yes"): - environ["SYNAPSE_NO_TLS"] = True - else: - if tlsanswerstring in ("false", "off", "0", "no"): - environ["SYNAPSE_NO_TLS"] = False - else: - error( - 'Environment variable "SYNAPSE_NO_TLS" found but value "' - + tlsanswerstring - + '" unrecognized; exiting.' - ) + tlsanswerstring = environ.get("SYNAPSE_NO_TLS") + if tlsanswerstring is not None: + try: + environ["SYNAPSE_NO_TLS"] = strtobool(tlsanswerstring) + except ValueError: + error( + 'Environment variable "SYNAPSE_NO_TLS" found but value "' + + tlsanswerstring + + '" unrecognized; exiting.' + ) if "SYNAPSE_LOG_CONFIG" not in environ: environ["SYNAPSE_LOG_CONFIG"] = config_dir + "/log.config" @@ -164,6 +180,18 @@ def run_generate_config(environ: Mapping[str, str], ownership: str | None) -> No config_dir = environ.get("SYNAPSE_CONFIG_DIR", "/data") config_path = environ.get("SYNAPSE_CONFIG_PATH", config_dir + "/homeserver.yaml") data_dir = environ.get("SYNAPSE_DATA_DIR", "/data") + enable_metrics_raw = environ.get("SYNAPSE_ENABLE_METRICS", "0") + + enable_metrics = False + if enable_metrics_raw is not None: + try: + enable_metrics = strtobool(enable_metrics_raw) + except ValueError: + error( + 'Environment variable "SYNAPSE_ENABLE_METRICS" found but value "' + + enable_metrics_raw + + '" unrecognized; exiting.' + ) # create a suitable log config from our template log_config_file = "%s/%s.log.config" % (config_dir, server_name) @@ -190,6 +218,9 @@ def run_generate_config(environ: Mapping[str, str], ownership: str | None) -> No "--open-private-ports", ] + if enable_metrics: + args.append("--enable-metrics") + if ownership is not None: # make sure that synapse has perms to write to the data dir. log(f"Setting ownership on {data_dir} to {ownership}") diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 0d75e6d4a1..470e44a8ed 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -24,14 +24,18 @@ server_name: "SERVERNAME" pid_file: DATADIR/homeserver.pid listeners: - - port: 8008 + - bind_addresses: + - ::1 + - 127.0.0.1 + port: 8008 + resources: + - compress: false + names: + - client + - federation tls: false type: http x_forwarded: true - bind_addresses: ['::1', '127.0.0.1'] - resources: - - names: [client, federation] - compress: false database: name: sqlite3 args: diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 43dece4a08..fdfbee98a1 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -44,6 +44,7 @@ import jinja2 import yaml from synapse.types import StrSequence +from synapse.util.stringutils import parse_and_validate_server_name from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter logger = logging.getLogger(__name__) @@ -465,6 +466,7 @@ class RootConfig: generate_secrets: bool = False, report_stats: bool | None = None, open_private_ports: bool = False, + enable_metrics: bool = False, listeners: list[dict] | None = None, tls_certificate_path: str | None = None, tls_private_key_path: str | None = None, @@ -495,9 +497,15 @@ class RootConfig: open_private_ports: True to leave private ports (such as the non-TLS HTTP listener) open to the internet. + enable_metrics: True to set `enable_metrics: true` and when using the + default set of listeners, will also add the metrics listener on port 19090. + listeners: A list of descriptions of the listeners synapse should - start with each of which specifies a port (int), a list of - resources (list(str)), tls (bool) and type (str). For example: + start with each of which specifies a port (int), a list of resources + (list(str)), tls (bool) and type (str). There is a default set of + listeners when `None`. + + Example usage: [{ "port": 8448, "resources": [{"names": ["federation"]}], @@ -518,6 +526,35 @@ class RootConfig: Returns: The yaml config file """ + _, bind_port = parse_and_validate_server_name(server_name) + if bind_port is not None: + unsecure_port = bind_port - 400 + else: + bind_port = 8448 + unsecure_port = 8008 + + # The default listeners + if listeners is None: + listeners = [ + { + "port": unsecure_port, + "tls": False, + "type": "http", + "x_forwarded": True, + "resources": [ + {"names": ["client", "federation"], "compress": False} + ], + } + ] + + if enable_metrics: + listeners.append( + { + "port": 19090, + "tls": False, + "type": "metrics", + } + ) conf = CONFIG_FILE_HEADER + "\n".join( dedent(conf) @@ -529,6 +566,7 @@ class RootConfig: generate_secrets=generate_secrets, report_stats=report_stats, open_private_ports=open_private_ports, + enable_metrics=enable_metrics, listeners=listeners, tls_certificate_path=tls_certificate_path, tls_private_key_path=tls_private_key_path, @@ -756,6 +794,14 @@ class RootConfig: " internet. Do not use this unless you know what you are doing." ), ) + generate_group.add_argument( + "--enable-metrics", + action="store_true", + help=( + "Sets `enable_metrics: true` and when using the default set of listeners, " + "will also add the metrics listener on port 19090." + ), + ) cls.invoke_all_static("add_arguments", parser) config_args = parser.parse_args(argv_options) @@ -812,6 +858,7 @@ class RootConfig: report_stats=(config_args.report_stats == "yes"), generate_secrets=True, open_private_ports=config_args.open_private_ports, + enable_metrics=config_args.enable_metrics, ) os.makedirs(config_dir_path, exist_ok=True) diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index fe9b3333c4..7c371d161c 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -146,6 +146,7 @@ class RootConfig: generate_secrets: bool = ..., report_stats: bool | None = ..., open_private_ports: bool = ..., + enable_metrics: bool = ..., listeners: Any | None = ..., tls_certificate_path: str | None = ..., tls_private_key_path: str | None = ..., diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 83dbee53b6..d8cddb82ed 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -75,10 +75,19 @@ class MetricsConfig(Config): ) def generate_config_section( - self, report_stats: bool | None = None, **kwargs: Any + self, + report_stats: bool | None = None, + enable_metrics: bool = False, + **kwargs: Any, ) -> str: if report_stats is not None: res = "report_stats: %s\n" % ("true" if report_stats else "false") else: res = "\n" + + # We avoid adding anything if it's `False` since that's the default (less noise + # in the default generated config) + if enable_metrics: + res += "enable_metrics: true\n" + return res diff --git a/synapse/config/server.py b/synapse/config/server.py index 495f289159..ca94c224ea 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -923,26 +923,21 @@ class ServerConfig(Config): def generate_config_section( self, + *, config_dir_path: str, data_dir_path: str, server_name: str, - open_private_ports: bool, - listeners: list[dict] | None, + open_private_ports: bool = False, + listeners: list[dict] | None = None, **kwargs: Any, ) -> str: - _, bind_port = parse_and_validate_server_name(server_name) - if bind_port is not None: - unsecure_port = bind_port - 400 - else: - bind_port = 8448 - unsecure_port = 8008 - pid_file = os.path.join(data_dir_path, "homeserver.pid") - secure_listeners = [] - unsecure_listeners = [] + http_bindings = "[]" private_addresses = ["::1", "127.0.0.1"] if listeners: + secure_listeners = [] + unsecure_listeners = [] for listener in listeners: if listener["tls"]: secure_listeners.append(listener) @@ -957,43 +952,17 @@ class ServerConfig(Config): unsecure_listeners.append(listener) - secure_http_bindings = indent( - yaml.dump(secure_listeners), " " * 10 + # `lstrip` is used because the first line already has whitespace in the + # template below + http_bindings = indent( + yaml.dump(secure_listeners + unsecure_listeners), " " * 10 ).lstrip() - unsecure_http_bindings = indent( - yaml.dump(unsecure_listeners), " " * 10 - ).lstrip() - - if not unsecure_listeners: - unsecure_http_bindings = """- port: %(unsecure_port)s - tls: false - type: http - x_forwarded: true""" % locals() - - if not open_private_ports: - unsecure_http_bindings += ( - "\n bind_addresses: ['::1', '127.0.0.1']" - ) - - unsecure_http_bindings += """ - - resources: - - names: [client, federation] - compress: false""" - - if listeners: - unsecure_http_bindings = "" - - if not secure_listeners: - secure_http_bindings = "" - return """\ server_name: "%(server_name)s" pid_file: %(pid_file)s listeners: - %(secure_http_bindings)s - %(unsecure_http_bindings)s + %(http_bindings)s """ % locals() def read_arguments(self, args: argparse.Namespace) -> None: diff --git a/tests/config/test_server.py b/tests/config/test_server.py index 5eb2540439..d3c59ae14c 100644 --- a/tests/config/test_server.py +++ b/tests/config/test_server.py @@ -21,6 +21,7 @@ import yaml from synapse.config._base import ConfigError, RootConfig +from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ServerConfig, generate_ip_set, is_threepid_reserved from tests import unittest @@ -38,14 +39,23 @@ class ServerConfigTestCase(unittest.TestCase): self.assertFalse(is_threepid_reserved(config, user3)) self.assertFalse(is_threepid_reserved(config, user1_msisdn)) - def test_unsecure_listener_no_listeners_open_private_ports_false(self) -> None: + def test_default_set_of_listeners(self) -> None: + """ + Test that we get a default set of listeners from the `RootConfig` + """ conf = yaml.safe_load( - ServerConfig(RootConfig()).generate_config_section( - "CONFDIR", "/data_dir_path", "che.org", False, None + # We use `HomeServerConfig` instead of `RootConfig` as it has all of the + # `config_classes` defined. + HomeServerConfig().generate_config( + config_dir_path="CONFDIR", + data_dir_path="/data_dir_path", + server_name="che.org", + open_private_ports=False, + listeners=None, ) ) - expected_listeners = [ + expected_listeners: list[dict] = [ { "port": 8008, "tls": False, @@ -58,25 +68,61 @@ class ServerConfigTestCase(unittest.TestCase): self.assertEqual(conf["listeners"], expected_listeners) - def test_unsecure_listener_no_listeners_open_private_ports_true(self) -> None: + def test_default_set_of_listeners_with_enable_metrics(self) -> None: + """ + Test that the default set of listeners from the `RootConfig` gets a metrics + listener when `enable_metrics=True`. + """ conf = yaml.safe_load( - ServerConfig(RootConfig()).generate_config_section( - "CONFDIR", "/data_dir_path", "che.org", True, None + # We use `HomeServerConfig` instead of `RootConfig` as it has all of the + # `config_classes` defined. + HomeServerConfig().generate_config( + config_dir_path="CONFDIR", + data_dir_path="/data_dir_path", + server_name="che.org", + open_private_ports=False, + enable_metrics=True, + listeners=None, ) ) - expected_listeners = [ + expected_listeners: list[dict] = [ { "port": 8008, "tls": False, "type": "http", "x_forwarded": True, + "bind_addresses": ["::1", "127.0.0.1"], "resources": [{"names": ["client", "federation"], "compress": False}], - } + }, + { + "port": 19090, + "tls": False, + "type": "metrics", + "bind_addresses": ["::1", "127.0.0.1"], + }, ] self.assertEqual(conf["listeners"], expected_listeners) + def test_unsecure_listener_no_listeners(self) -> None: + conf = yaml.safe_load( + ServerConfig(RootConfig()).generate_config_section( + config_dir_path="CONFDIR", + data_dir_path="/data_dir_path", + server_name="che.org", + open_private_ports=False, + listeners=None, + ) + ) + + # We expect `None` because we only operate with what's given to us. The default + # set of listeners comes from the logic one layer above in `RootConfig` (see + # tests above). + expected_listeners: list[dict] = [] + + self.assertEqual(conf["listeners"], expected_listeners) + def test_listeners_set_correctly_open_private_ports_false(self) -> None: listeners = [ { @@ -95,7 +141,11 @@ class ServerConfigTestCase(unittest.TestCase): conf = yaml.safe_load( ServerConfig(RootConfig()).generate_config_section( - "CONFDIR", "/data_dir_path", "this.one.listens", True, listeners + config_dir_path="CONFDIR", + data_dir_path="/data_dir_path", + server_name="this.one.listens", + open_private_ports=True, + listeners=listeners, ) ) @@ -129,7 +179,11 @@ class ServerConfigTestCase(unittest.TestCase): conf = yaml.safe_load( ServerConfig(RootConfig()).generate_config_section( - "CONFDIR", "/data_dir_path", "this.one.listens", True, listeners + config_dir_path="CONFDIR", + data_dir_path="/data_dir_path", + server_name="this.one.listens", + open_private_ports=True, + listeners=listeners, ) )