diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py new file mode 100644 index 0000000000..95a8104c62 --- /dev/null +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -0,0 +1,87 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2025 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# + +from prometheus_client import CollectorRegistry, Counter + + +class BlockMetrics: + def __init__( + self, + metrics_collector_registry: CollectorRegistry, + ) -> None: + self.block_counter = Counter( + "synapse_util_metrics_block_count", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_timer = Counter( + "synapse_util_metrics_block_time_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_ru_utime = Counter( + "synapse_util_metrics_block_ru_utime_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_ru_stime = Counter( + "synapse_util_metrics_block_ru_stime_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_db_txn_count = Counter( + "synapse_util_metrics_block_db_txn_count", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_db_txn_duration = Counter( + "synapse_util_metrics_block_db_txn_duration_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + """seconds spent waiting for db txns, excluding scheduling time, in this block""" + + self.block_db_sched_duration = Counter( + "synapse_util_metrics_block_db_sched_duration_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + """seconds spent waiting for a db connection, in this block""" + + +class HomeserverMetricsManager: + """ + Homeserver-scoped metrics manager. + + This class serves as a container for the homeserver's global metrics objects. + """ + + def __init__(self) -> None: + self.metrics_collector_registry = CollectorRegistry(auto_describe=True) + + self.block_metrics = BlockMetrics( + metrics_collector_registry=self.metrics_collector_registry, + ) diff --git a/synapse/server.py b/synapse/server.py index fd16abb9ea..6b5302fa02 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -129,6 +129,7 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.media.media_repository import MediaRepository from synapse.metrics import register_threadpool from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.module_api import ModuleApi from synapse.module_api.callbacks import ModuleApiCallbacks from synapse.notifier import Notifier, ReplicationNotifier @@ -310,6 +311,8 @@ class HomeServer(metaclass=abc.ABCMeta): # This attribute is set by the free function `refresh_certificate`. self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None + self.metrics_manager = HomeserverMetricsManager() + def register_module_web_resource(self, path: str, resource: Resource) -> None: """Allows a module to register a web resource to be served at the given path. diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 6a389f7a7e..98900df2ab 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -33,7 +33,7 @@ from typing import ( TypeVar, ) -from prometheus_client import CollectorRegistry, Counter, Metric +from prometheus_client import CollectorRegistry, Metric from typing_extensions import Concatenate, ParamSpec from synapse.logging.context import ( @@ -42,36 +42,11 @@ from synapse.logging.context import ( current_context, ) from synapse.metrics import InFlightGauge +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.util import Clock logger = logging.getLogger(__name__) -block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"]) - -block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"]) - -block_ru_utime = Counter( - "synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"] -) - -block_ru_stime = Counter( - "synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"] -) - -block_db_txn_count = Counter( - "synapse_util_metrics_block_db_txn_count", "", ["block_name"] -) - -# seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = Counter( - "synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"] -) - -# seconds spent waiting for a db connection, in this block -block_db_sched_duration = Counter( - "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"] -) - # This is dynamically created in InFlightGauge.__init__. class _InFlightMetric(Protocol): @@ -141,12 +116,15 @@ def measure_func( class Measure: __slots__ = [ "clock", + "metrics_manager", "name", "_logging_context", "start", ] - def __init__(self, clock: Clock, name: str) -> None: + def __init__( + self, clock: Clock, name: str, metrics_manager: HomeserverMetricsManager + ) -> None: """ Args: clock: An object with a "time()" method, which returns the current @@ -154,6 +132,7 @@ class Measure: name: The name of the metric to report. """ self.clock = clock + self.metrics_manager = metrics_manager self.name = name curr_context = current_context() if not curr_context: @@ -198,13 +177,25 @@ class Measure: self._logging_context.__exit__(exc_type, exc_val, exc_tb) try: - block_counter.labels(self.name).inc() - block_timer.labels(self.name).inc(duration) - block_ru_utime.labels(self.name).inc(usage.ru_utime) - block_ru_stime.labels(self.name).inc(usage.ru_stime) - block_db_txn_count.labels(self.name).inc(usage.db_txn_count) - block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) - block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) + self.metrics_manager.block_metrics.block_counter.labels(self.name).inc() + self.metrics_manager.block_metrics.block_timer.labels(self.name).inc( + duration + ) + self.metrics_manager.block_metrics.block_ru_utime.labels(self.name).inc( + usage.ru_utime + ) + self.metrics_manager.block_metrics.block_ru_stime.labels(self.name).inc( + usage.ru_stime + ) + self.metrics_manager.block_metrics.block_db_txn_count.labels(self.name).inc( + usage.db_txn_count + ) + self.metrics_manager.block_metrics.block_db_txn_duration.labels( + self.name + ).inc(usage.db_txn_duration_sec) + self.metrics_manager.block_metrics.block_db_sched_duration.labels( + self.name + ).inc(usage.db_sched_duration_sec) except ValueError: logger.warning("Failed to save metrics! Usage: %s", usage)