Add a unit test for the phone home stats (#18463)

This commit is contained in:
Andrew Morgan
2025-05-20 16:26:45 +01:00
committed by GitHub
parent 9d43bec326
commit 4b1d9d5d0e
4 changed files with 296 additions and 7 deletions

1
changelog.d/18463.misc Normal file
View File

@@ -0,0 +1 @@
Add unit tests for homeserver usage statistics.

View File

@@ -30,7 +30,7 @@ The following statistics are sent to the configured reporting endpoint:
| `python_version` | string | The Python version number in use (e.g "3.7.1"). Taken from `sys.version_info`. |
| `total_users` | int | The number of registered users on the homeserver. |
| `total_nonbridged_users` | int | The number of users, excluding those created by an Application Service. |
| `daily_user_type_native` | int | The number of native users created in the last 24 hours. |
| `daily_user_type_native` | int | The number of native, non-guest users created in the last 24 hours. |
| `daily_user_type_guest` | int | The number of guest users created in the last 24 hours. |
| `daily_user_type_bridged` | int | The number of users created by Application Services in the last 24 hours. |
| `total_room_count` | int | The total number of rooms present on the homeserver. |
@@ -50,8 +50,8 @@ The following statistics are sent to the configured reporting endpoint:
| `cache_factor` | int | The configured [`global factor`](../../configuration/config_documentation.md#caching) value for caching. |
| `event_cache_size` | int | The configured [`event_cache_size`](../../configuration/config_documentation.md#caching) value for caching. |
| `database_engine` | string | The database engine that is in use. Either "psycopg2" meaning PostgreSQL is in use, or "sqlite3" for SQLite3. |
| `database_server_version` | string | The version of the database server. Examples being "10.10" for PostgreSQL server version 10.0, and "3.38.5" for SQLite 3.38.5 installed on the system. |
| `log_level` | string | The log level in use. Examples are "INFO", "WARNING", "ERROR", "DEBUG", etc. |
| `database_server_version` | string | The version of the database server. Examples being "10.10" for PostgreSQL server version 10.0, and "3.38.5" for SQLite 3.38.5 installed on the system. |
| `log_level` | string | The log level in use. Examples are "INFO", "WARNING", "ERROR", "DEBUG", etc. |
[^1]: Native matrix users and guests are always counted. If the

View File

@@ -34,6 +34,22 @@ if TYPE_CHECKING:
logger = logging.getLogger("synapse.app.homeserver")
ONE_MINUTE_SECONDS = 60
ONE_HOUR_SECONDS = 60 * ONE_MINUTE_SECONDS
MILLISECONDS_PER_SECOND = 1000
INITIAL_DELAY_BEFORE_FIRST_PHONE_HOME_SECONDS = 5 * ONE_MINUTE_SECONDS
"""
We wait 5 minutes to send the first set of stats as the server can be quite busy the
first few minutes
"""
PHONE_HOME_INTERVAL_SECONDS = 3 * ONE_HOUR_SECONDS
"""
Phone home stats are sent every 3 hours
"""
# Contains the list of processes we will be monitoring
# currently either 0 or 1
_stats_process: List[Tuple[int, "resource.struct_rusage"]] = []
@@ -185,12 +201,14 @@ def start_phone_stats_home(hs: "HomeServer") -> None:
# If you increase the loop period, the accuracy of user_daily_visits
# table will decrease
clock.looping_call(
hs.get_datastores().main.generate_user_daily_visits, 5 * 60 * 1000
hs.get_datastores().main.generate_user_daily_visits,
5 * ONE_MINUTE_SECONDS * MILLISECONDS_PER_SECOND,
)
# monthly active user limiting functionality
clock.looping_call(
hs.get_datastores().main.reap_monthly_active_users, 1000 * 60 * 60
hs.get_datastores().main.reap_monthly_active_users,
ONE_HOUR_SECONDS * MILLISECONDS_PER_SECOND,
)
hs.get_datastores().main.reap_monthly_active_users()
@@ -221,7 +239,12 @@ def start_phone_stats_home(hs: "HomeServer") -> None:
if hs.config.metrics.report_stats:
logger.info("Scheduling stats reporting for 3 hour intervals")
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
clock.looping_call(
phone_stats_home,
PHONE_HOME_INTERVAL_SECONDS * MILLISECONDS_PER_SECOND,
hs,
stats,
)
# We need to defer this init for the cases that we daemonize
# otherwise the process ID we get is that of the non-daemon process
@@ -229,4 +252,6 @@ def start_phone_stats_home(hs: "HomeServer") -> None:
# We wait 5 minutes to send the first set of stats as the server can
# be quite busy the first few minutes
clock.call_later(5 * 60, phone_stats_home, hs, stats)
clock.call_later(
INITIAL_DELAY_BEFORE_FIRST_PHONE_HOME_SECONDS, phone_stats_home, hs, stats
)

View File

@@ -0,0 +1,263 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2025 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
import logging
from unittest.mock import AsyncMock
from twisted.test.proto_helpers import MemoryReactor
from synapse.app.phone_stats_home import (
PHONE_HOME_INTERVAL_SECONDS,
start_phone_stats_home,
)
from synapse.rest import admin, login, register, room
from synapse.server import HomeServer
from synapse.types import JsonDict
from synapse.util import Clock
from tests import unittest
from tests.server import ThreadedMemoryReactorClock
TEST_REPORT_STATS_ENDPOINT = "https://fake.endpoint/stats"
TEST_SERVER_CONTEXT = "test-server-context"
class PhoneHomeStatsTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets_for_client_rest_resource,
room.register_servlets,
register.register_servlets,
login.register_servlets,
]
def make_homeserver(
self, reactor: ThreadedMemoryReactorClock, clock: Clock
) -> HomeServer:
# Configure the homeserver to enable stats reporting.
config = self.default_config()
config["report_stats"] = True
config["report_stats_endpoint"] = TEST_REPORT_STATS_ENDPOINT
# Configure the server context so we can check it ends up being reported
config["server_context"] = TEST_SERVER_CONTEXT
# Allow guests to be registered
config["allow_guest_access"] = True
hs = self.setup_test_homeserver(config=config)
# Replace the proxied http client with a mock, so we can inspect outbound requests to
# the configured stats endpoint.
self.put_json_mock = AsyncMock(return_value={})
hs.get_proxied_http_client().put_json = self.put_json_mock # type: ignore[method-assign]
return hs
def prepare(
self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
) -> None:
self.store = homeserver.get_datastores().main
# Wait for the background updates to add the database triggers that keep the
# `event_stats` table up-to-date.
self.wait_for_background_updates()
# Force stats reporting to occur
start_phone_stats_home(hs=homeserver)
super().prepare(reactor, clock, homeserver)
def _get_latest_phone_home_stats(self) -> JsonDict:
# Wait for `phone_stats_home` to be called again + a healthy margin (50s).
self.reactor.advance(2 * PHONE_HOME_INTERVAL_SECONDS + 50)
# Extract the reported stats from our http client mock
mock_calls = self.put_json_mock.call_args_list
report_stats_calls = []
for call in mock_calls:
if call.args[0] == TEST_REPORT_STATS_ENDPOINT:
report_stats_calls.append(call)
self.assertGreaterEqual(
(len(report_stats_calls)),
1,
"Expected at-least one call to the report_stats endpoint",
)
# Extract the phone home stats from the call
phone_home_stats = report_stats_calls[0].args[1]
return phone_home_stats
def _perform_user_actions(self) -> None:
"""
Perform some actions on the homeserver that would bump the phone home
stats.
This creates a few users (including a guest), a room, and sends some messages.
Expected number of events:
- 10 unencrypted messages
- 5 encrypted messages
- 24 total events (including room state, etc)
"""
# Create some users
user_1_mxid = self.register_user(
username="test_user_1",
password="test",
)
user_2_mxid = self.register_user(
username="test_user_2",
password="test",
)
# Note: `self.register_user` does not support guest registration, and updating the
# Admin API it calls to add a new parameter would cause the `mac` parameter to fail
# in a backwards-incompatible manner. Hence, we make a manual request here.
_guest_user_mxid = self.make_request(
method="POST",
path="/_matrix/client/v3/register?kind=guest",
content={
"username": "guest_user",
"password": "test",
},
shorthand=False,
)
# Log in to each user
user_1_token = self.login(username=user_1_mxid, password="test")
user_2_token = self.login(username=user_2_mxid, password="test")
# Create a room between the two users
room_1_id = self.helper.create_room_as(
is_public=False,
tok=user_1_token,
)
# Mark this room as end-to-end encrypted
self.helper.send_state(
room_id=room_1_id,
event_type="m.room.encryption",
body={
"algorithm": "m.megolm.v1.aes-sha2",
"rotation_period_ms": 604800000,
"rotation_period_msgs": 100,
},
state_key="",
tok=user_1_token,
)
# User 1 invites user 2
self.helper.invite(
room=room_1_id,
src=user_1_mxid,
targ=user_2_mxid,
tok=user_1_token,
)
# User 2 joins
self.helper.join(
room=room_1_id,
user=user_2_mxid,
tok=user_2_token,
)
# User 1 sends 10 unencrypted messages
for _ in range(10):
self.helper.send(
room_id=room_1_id,
body="Zoinks Scoob! A message!",
tok=user_1_token,
)
# User 2 sends 5 encrypted "messages"
for _ in range(5):
self.helper.send_event(
room_id=room_1_id,
type="m.room.encrypted",
content={
"algorithm": "m.olm.v1.curve25519-aes-sha2",
"sender_key": "some_key",
"ciphertext": {
"some_key": {
"type": 0,
"body": "encrypted_payload",
},
},
},
tok=user_2_token,
)
def test_phone_home_stats(self) -> None:
"""
Test that the phone home stats contain the stats we expect based on
the scenario carried out in `prepare`
"""
# Do things to bump the stats
self._perform_user_actions()
# Wait for the stats to be reported
phone_home_stats = self._get_latest_phone_home_stats()
self.assertEqual(
phone_home_stats["homeserver"], self.hs.config.server.server_name
)
self.assertTrue(isinstance(phone_home_stats["memory_rss"], int))
self.assertTrue(isinstance(phone_home_stats["cpu_average"], int))
self.assertEqual(phone_home_stats["server_context"], TEST_SERVER_CONTEXT)
self.assertTrue(isinstance(phone_home_stats["timestamp"], int))
self.assertTrue(isinstance(phone_home_stats["uptime_seconds"], int))
self.assertTrue(isinstance(phone_home_stats["python_version"], str))
# We expect only our test users to exist on the homeserver
self.assertEqual(phone_home_stats["total_users"], 3)
self.assertEqual(phone_home_stats["total_nonbridged_users"], 3)
self.assertEqual(phone_home_stats["daily_user_type_native"], 2)
self.assertEqual(phone_home_stats["daily_user_type_guest"], 1)
self.assertEqual(phone_home_stats["daily_user_type_bridged"], 0)
self.assertEqual(phone_home_stats["total_room_count"], 1)
self.assertEqual(phone_home_stats["daily_active_users"], 2)
self.assertEqual(phone_home_stats["monthly_active_users"], 2)
self.assertEqual(phone_home_stats["daily_active_rooms"], 1)
self.assertEqual(phone_home_stats["daily_active_e2ee_rooms"], 1)
self.assertEqual(phone_home_stats["daily_messages"], 10)
self.assertEqual(phone_home_stats["daily_e2ee_messages"], 5)
self.assertEqual(phone_home_stats["daily_sent_messages"], 10)
self.assertEqual(phone_home_stats["daily_sent_e2ee_messages"], 5)
# Our users have not been around for >30 days, hence these are all 0.
self.assertEqual(phone_home_stats["r30v2_users_all"], 0)
self.assertEqual(phone_home_stats["r30v2_users_android"], 0)
self.assertEqual(phone_home_stats["r30v2_users_ios"], 0)
self.assertEqual(phone_home_stats["r30v2_users_electron"], 0)
self.assertEqual(phone_home_stats["r30v2_users_web"], 0)
self.assertEqual(
phone_home_stats["cache_factor"], self.hs.config.caches.global_factor
)
self.assertEqual(
phone_home_stats["event_cache_size"],
self.hs.config.caches.event_cache_size,
)
self.assertEqual(
phone_home_stats["database_engine"],
self.hs.config.database.databases[0].config["name"],
)
self.assertEqual(
phone_home_stats["database_server_version"],
self.hs.get_datastores().main.database_engine.server_version,
)
synapse_logger = logging.getLogger("synapse")
log_level = synapse_logger.getEffectiveLevel()
self.assertEqual(phone_home_stats["log_level"], logging.getLevelName(log_level))