1
0

Refactor block metrics to use HS specific registry

Fix `HomeserverMetricsManager` lints

Fix `metrics_manager` missing in `Measure` `__slots__`
This commit is contained in:
Eric Eastwood
2025-06-23 15:09:47 -05:00
parent 6fabf82f4f
commit 676a2cc774
3 changed files with 116 additions and 35 deletions
@@ -0,0 +1,87 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2025 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
from prometheus_client import CollectorRegistry, Counter
class BlockMetrics:
def __init__(
self,
metrics_collector_registry: CollectorRegistry,
) -> None:
self.block_counter = Counter(
"synapse_util_metrics_block_count",
"",
["block_name"],
registry=metrics_collector_registry,
)
self.block_timer = Counter(
"synapse_util_metrics_block_time_seconds",
"",
["block_name"],
registry=metrics_collector_registry,
)
self.block_ru_utime = Counter(
"synapse_util_metrics_block_ru_utime_seconds",
"",
["block_name"],
registry=metrics_collector_registry,
)
self.block_ru_stime = Counter(
"synapse_util_metrics_block_ru_stime_seconds",
"",
["block_name"],
registry=metrics_collector_registry,
)
self.block_db_txn_count = Counter(
"synapse_util_metrics_block_db_txn_count",
"",
["block_name"],
registry=metrics_collector_registry,
)
self.block_db_txn_duration = Counter(
"synapse_util_metrics_block_db_txn_duration_seconds",
"",
["block_name"],
registry=metrics_collector_registry,
)
"""seconds spent waiting for db txns, excluding scheduling time, in this block"""
self.block_db_sched_duration = Counter(
"synapse_util_metrics_block_db_sched_duration_seconds",
"",
["block_name"],
registry=metrics_collector_registry,
)
"""seconds spent waiting for a db connection, in this block"""
class HomeserverMetricsManager:
"""
Homeserver-scoped metrics manager.
This class serves as a container for the homeserver's global metrics objects.
"""
def __init__(self) -> None:
self.metrics_collector_registry = CollectorRegistry(auto_describe=True)
self.block_metrics = BlockMetrics(
metrics_collector_registry=self.metrics_collector_registry,
)
+3
View File
@@ -129,6 +129,7 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.media.media_repository import MediaRepository
from synapse.metrics import register_threadpool
from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager
from synapse.module_api import ModuleApi
from synapse.module_api.callbacks import ModuleApiCallbacks
from synapse.notifier import Notifier, ReplicationNotifier
@@ -310,6 +311,8 @@ class HomeServer(metaclass=abc.ABCMeta):
# This attribute is set by the free function `refresh_certificate`.
self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None
self.metrics_manager = HomeserverMetricsManager()
def register_module_web_resource(self, path: str, resource: Resource) -> None:
"""Allows a module to register a web resource to be served at the given path.
+26 -35
View File
@@ -33,7 +33,7 @@ from typing import (
TypeVar,
)
from prometheus_client import CollectorRegistry, Counter, Metric
from prometheus_client import CollectorRegistry, Metric
from typing_extensions import Concatenate, ParamSpec
from synapse.logging.context import (
@@ -42,36 +42,11 @@ from synapse.logging.context import (
current_context,
)
from synapse.metrics import InFlightGauge
from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager
from synapse.util import Clock
logger = logging.getLogger(__name__)
block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
block_ru_utime = Counter(
"synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"]
)
block_ru_stime = Counter(
"synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"]
)
block_db_txn_count = Counter(
"synapse_util_metrics_block_db_txn_count", "", ["block_name"]
)
# seconds spent waiting for db txns, excluding scheduling time, in this block
block_db_txn_duration = Counter(
"synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"]
)
# seconds spent waiting for a db connection, in this block
block_db_sched_duration = Counter(
"synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]
)
# This is dynamically created in InFlightGauge.__init__.
class _InFlightMetric(Protocol):
@@ -141,12 +116,15 @@ def measure_func(
class Measure:
__slots__ = [
"clock",
"metrics_manager",
"name",
"_logging_context",
"start",
]
def __init__(self, clock: Clock, name: str) -> None:
def __init__(
self, clock: Clock, name: str, metrics_manager: HomeserverMetricsManager
) -> None:
"""
Args:
clock: An object with a "time()" method, which returns the current
@@ -154,6 +132,7 @@ class Measure:
name: The name of the metric to report.
"""
self.clock = clock
self.metrics_manager = metrics_manager
self.name = name
curr_context = current_context()
if not curr_context:
@@ -198,13 +177,25 @@ class Measure:
self._logging_context.__exit__(exc_type, exc_val, exc_tb)
try:
block_counter.labels(self.name).inc()
block_timer.labels(self.name).inc(duration)
block_ru_utime.labels(self.name).inc(usage.ru_utime)
block_ru_stime.labels(self.name).inc(usage.ru_stime)
block_db_txn_count.labels(self.name).inc(usage.db_txn_count)
block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec)
block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec)
self.metrics_manager.block_metrics.block_counter.labels(self.name).inc()
self.metrics_manager.block_metrics.block_timer.labels(self.name).inc(
duration
)
self.metrics_manager.block_metrics.block_ru_utime.labels(self.name).inc(
usage.ru_utime
)
self.metrics_manager.block_metrics.block_ru_stime.labels(self.name).inc(
usage.ru_stime
)
self.metrics_manager.block_metrics.block_db_txn_count.labels(self.name).inc(
usage.db_txn_count
)
self.metrics_manager.block_metrics.block_db_txn_duration.labels(
self.name
).inc(usage.db_txn_duration_sec)
self.metrics_manager.block_metrics.block_db_sched_duration.labels(
self.name
).inc(usage.db_sched_duration_sec)
except ValueError:
logger.warning("Failed to save metrics! Usage: %s", usage)