From 3ba3c7fe7d3f338694e61b21b1fa3eaad7182350 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Nov 2025 13:41:58 +0000 Subject: [PATCH] Reduce cardinality of metrics on event persister (#19133) This reduces the size of metrics by ~80%. Responding with the metrics takes significant amounts of time. --- changelog.d/19133.misc | 1 + contrib/grafana/synapse.json | 99 +----------------------- contrib/prometheus/synapse-v2.rules | 28 ------- synapse/storage/databases/main/events.py | 9 +-- 4 files changed, 6 insertions(+), 131 deletions(-) create mode 100644 changelog.d/19133.misc diff --git a/changelog.d/19133.misc b/changelog.d/19133.misc new file mode 100644 index 0000000000..122f2849b8 --- /dev/null +++ b/changelog.d/19133.misc @@ -0,0 +1 @@ +Reduce cardinality of `synapse_storage_events_persisted_events_sep_total` metric by removing `origin_entity` label. This also separates out events sent by local application services by changing the `origin_type` for such events to `application_service`. diff --git a/contrib/grafana/synapse.json b/contrib/grafana/synapse.json index e23afcf2d3..d2eb2dafae 100644 --- a/contrib/grafana/synapse.json +++ b/contrib/grafana/synapse.json @@ -2166,10 +2166,10 @@ "datasource": { "uid": "${DS_PROMETHEUS}" }, - "expr": "rate(synapse_storage_events_persisted_by_source_type{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])", + "expr": "rate(synapse_storage_events_persisted_events_sep_total{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "{{origin_type}}", "refId": "D" } ], @@ -2254,7 +2254,7 @@ "datasource": { "uid": "${DS_PROMETHEUS}" }, - "expr": "rate(synapse_storage_events_persisted_by_event_type{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])", + "expr": "sum by(type) (rate(synapse_storage_events_persisted_events_sep_total{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size]))", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -2294,99 +2294,6 @@ "align": false } }, - { - "aliasColors": { - "irc-freenode (local)": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "${DS_PROMETHEUS}" - }, - "decimals": 1, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 44 - }, - "hiddenSeries": false, - "id": 44, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.2.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "uid": "${DS_PROMETHEUS}" - }, - "expr": "rate(synapse_storage_events_persisted_by_origin{job=~\"$job\",index=~\"$index\",instance=\"$instance\"}[$bucket_size])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{origin_entity}} ({{origin_type}})", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Events/s by Origin", - "tooltip": { - "shared": false, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "hertz", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, { "aliasColors": {}, "bars": false, diff --git a/contrib/prometheus/synapse-v2.rules b/contrib/prometheus/synapse-v2.rules index dde311322f..3a5de8f528 100644 --- a/contrib/prometheus/synapse-v2.rules +++ b/contrib/prometheus/synapse-v2.rules @@ -44,31 +44,3 @@ groups: ### ### End of 'Prometheus Console Only' rules block ### - - - ### - ### Grafana Only - ### The following rules are only needed if you use the Grafana dashboard - ### in contrib/grafana/synapse.json - ### - - record: synapse_storage_events_persisted_by_source_type - expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_type="remote"}) - labels: - type: remote - - record: synapse_storage_events_persisted_by_source_type - expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity="*client*",origin_type="local"}) - labels: - type: local - - record: synapse_storage_events_persisted_by_source_type - expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity!="*client*",origin_type="local"}) - labels: - type: bridges - - - record: synapse_storage_events_persisted_by_event_type - expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep_total) - - - record: synapse_storage_events_persisted_by_origin - expr: sum without(type) (synapse_storage_events_persisted_events_sep_total) - ### - ### End of 'Grafana Only' rules block - ### diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 59112e647c..ae30a61cec 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -74,7 +74,6 @@ from synapse.types import ( MutableStateMap, StateMap, StrCollection, - get_domain_from_id, ) from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter @@ -96,7 +95,7 @@ persist_event_counter = Counter( event_counter = Counter( "synapse_storage_events_persisted_events_sep", "", - labelnames=["type", "origin_type", "origin_entity", SERVER_NAME_LABEL], + labelnames=["type", "origin_type", SERVER_NAME_LABEL], ) # State event type/key pairs that we need to gather to fill in the @@ -374,19 +373,15 @@ class PersistEventsStore: for event, context in events_and_contexts: if context.app_service: - origin_type = "local" - origin_entity = context.app_service.id + origin_type = "application_service" elif self.hs.is_mine_id(event.sender): origin_type = "local" - origin_entity = "*client*" else: origin_type = "remote" - origin_entity = get_domain_from_id(event.sender) event_counter.labels( type=event.type, origin_type=origin_type, - origin_entity=origin_entity, **{SERVER_NAME_LABEL: self.server_name}, ).inc()