1
0

Compare commits

..

10 Commits

Author SHA1 Message Date
Brendan Abolivier
d1473f7362 Use link to advisory rather than to the CVE repo 2021-05-11 14:09:46 +01:00
Brendan Abolivier
86fb71431c 1.33.2 2021-05-11 14:01:32 +01:00
Richard van der Hoff
03318a766c Merge pull request from GHSA-x345-32rc-8h85
* tests for push rule pattern matching

* tests for acl pattern matching

* factor out common `re.escape`

* Factor out common re.compile

* Factor out common anchoring code

* add word_boundary support to `glob_to_regex`

* Use `glob_to_regex` in push rule evaluator

NB that this drops support for character classes. I don't think anyone ever
used them.

* Improve efficiency of globs with multiple wildcards

The idea here is that we compress multiple `*` globs into a single `.*`. We
also need to consider `?`, since `*?*` is as hard to implement efficiently as
`**`.

* add assertion on regex pattern

* Fix mypy

* Simplify glob_to_regex

* Inline the glob_to_regex helper function

Signed-off-by: Dan Callahan <danc@element.io>

* Moar comments

Signed-off-by: Dan Callahan <danc@element.io>

Co-authored-by: Dan Callahan <danc@element.io>
2021-05-11 11:47:23 +02:00
Erik Johnston
4df26abf28 Unpin attrs dep after new version has been released (#9946)
c.f. #9936
2021-05-07 12:57:21 +01:00
Erik Johnston
ac88aca7f7 1.33.1 2021-05-06 14:06:38 +01:00
Erik Johnston
24f07a83e6 Pin attrs to <21.1.0 (#9937)
Fixes #9936
2021-05-06 14:06:06 +01:00
Brendan Abolivier
0644ac0989 1.33.0 2021-05-05 14:15:54 +01:00
Dan Callahan
56c4b47df3 Build Debian packages for Ubuntu 21.04 Hirsute (#9909)
Signed-off-by: Dan Callahan <danc@element.io>
2021-04-30 15:36:05 +01:00
Andrew Morgan
d11f2dfee5 typo in changelog 2021-04-29 14:31:14 +01:00
Andrew Morgan
e9444cc74d 1.33.0rc2 2021-04-29 11:45:37 +01:00
19 changed files with 454 additions and 236 deletions

View File

@@ -1,3 +1,46 @@
Synapse 1.33.2 (2021-05-11)
===========================
Due to the security issue highlighted below, server administrators are encouraged to update Synapse. We are not aware of these vulnerabilities being exploited in the wild.
Security advisory
-----------------
This release fixes a denial of service attack ([CVE-2021-29471](https://github.com/matrix-org/synapse/security/advisories/GHSA-x345-32rc-8h85)) against Synapse's push rules implementation. Server admins are encouraged to upgrade.
Internal Changes
----------------
- Unpin attrs dependency. ([\#9946](https://github.com/matrix-org/synapse/issues/9946))
Synapse 1.33.1 (2021-05-06)
===========================
Bugfixes
--------
- Fix bug where `/sync` would break if using the latest version of `attrs` dependency, by pinning to a previous version. ([\#9937](https://github.com/matrix-org/synapse/issues/9937))
Synapse 1.33.0 (2021-05-05)
===========================
Features
--------
- Build Debian packages for Ubuntu 21.04 (Hirsute Hippo). ([\#9909](https://github.com/matrix-org/synapse/issues/9909))
Synapse 1.33.0rc2 (2021-04-29)
==============================
Bugfixes
--------
- Fix tight loop when handling presence replication when using workers. Introduced in v1.33.0rc1. ([\#9900](https://github.com/matrix-org/synapse/issues/9900))
Synapse 1.33.0rc1 (2021-04-28)
==============================

View File

@@ -1 +0,0 @@
Add type hints to presence handler.

View File

@@ -1 +0,0 @@
Reduce memory usage of the LRU caches.

View File

@@ -1 +0,0 @@
Fix a bug introduced in v1.32.0 where the associated connection was improperly logged for SQL logging statements.

View File

@@ -1 +0,0 @@
Fix tight loop handling presence replication when using workers. Introduced in v1.33.0rc1.

18
debian/changelog vendored
View File

@@ -1,3 +1,21 @@
matrix-synapse-py3 (1.33.2) stable; urgency=medium
* New synapse release 1.33.2.
-- Synapse Packaging team <packages@matrix.org> Tue, 11 May 2021 11:17:59 +0100
matrix-synapse-py3 (1.33.1) stable; urgency=medium
* New synapse release 1.33.1.
-- Synapse Packaging team <packages@matrix.org> Thu, 06 May 2021 14:06:33 +0100
matrix-synapse-py3 (1.33.0) stable; urgency=medium
* New synapse release 1.33.0.
-- Synapse Packaging team <packages@matrix.org> Wed, 05 May 2021 14:15:27 +0100
matrix-synapse-py3 (1.32.2) stable; urgency=medium
* New synapse release 1.32.2.

View File

@@ -21,9 +21,10 @@ DISTS = (
"debian:buster",
"debian:bullseye",
"debian:sid",
"ubuntu:bionic",
"ubuntu:focal",
"ubuntu:groovy",
"ubuntu:bionic", # 18.04 LTS (our EOL forced by Py36 on 2021-12-23)
"ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14)
"ubuntu:groovy", # 20.10 (EOL 2021-07-07)
"ubuntu:hirsute", # 21.04 (EOL 2022-01-05)
)
DESC = '''\

View File

@@ -47,7 +47,7 @@ try:
except ImportError:
pass
__version__ = "1.33.0rc1"
__version__ = "1.33.2"
if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
# We import here so that we don't have to install a bunch of deps when

View File

@@ -17,7 +17,7 @@ import os
import warnings
from datetime import datetime
from hashlib import sha256
from typing import List, Optional
from typing import List, Optional, Pattern
from unpaddedbase64 import encode_base64
@@ -124,7 +124,7 @@ class TlsConfig(Config):
fed_whitelist_entries = []
# Support globs (*) in whitelist values
self.federation_certificate_verification_whitelist = [] # type: List[str]
self.federation_certificate_verification_whitelist = [] # type: List[Pattern]
for entry in fed_whitelist_entries:
try:
entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii"))

View File

@@ -28,7 +28,6 @@ from bisect import bisect
from contextlib import contextmanager
from typing import (
TYPE_CHECKING,
Callable,
Collection,
Dict,
FrozenSet,
@@ -233,23 +232,23 @@ class BasePresenceHandler(abc.ABC):
"""
async def update_external_syncs_row(
self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int
) -> None:
self, process_id, user_id, is_syncing, sync_time_msec
):
"""Update the syncing users for an external process as a delta.
This is a no-op when presence is handled by a different worker.
Args:
process_id: An identifier for the process the users are
process_id (str): An identifier for the process the users are
syncing against. This allows synapse to process updates
as user start and stop syncing against a given process.
user_id: The user who has started or stopped syncing
is_syncing: Whether or not the user is now syncing
sync_time_msec: Time in ms when the user was last syncing
user_id (str): The user who has started or stopped syncing
is_syncing (bool): Whether or not the user is now syncing
sync_time_msec(int): Time in ms when the user was last syncing
"""
pass
async def update_external_syncs_clear(self, process_id: str) -> None:
async def update_external_syncs_clear(self, process_id):
"""Marks all users that had been marked as syncing by a given process
as offline.
@@ -305,7 +304,7 @@ class _NullContextManager(ContextManager[None]):
class WorkerPresenceHandler(BasePresenceHandler):
def __init__(self, hs: "HomeServer"):
def __init__(self, hs):
super().__init__(hs)
self.hs = hs
@@ -328,7 +327,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
# user_id -> last_sync_ms. Lists the users that have stopped syncing but
# we haven't notified the presence writer of that yet
self.users_going_offline = {} # type: Dict[str, int]
self.users_going_offline = {}
self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs)
self._set_state_client = ReplicationPresenceSetState.make_client(hs)
@@ -347,21 +346,24 @@ class WorkerPresenceHandler(BasePresenceHandler):
self._on_shutdown,
)
def _on_shutdown(self) -> None:
def _on_shutdown(self):
if self._presence_enabled:
self.hs.get_tcp_replication().send_command(
ClearUserSyncsCommand(self.instance_id)
)
def send_user_sync(self, user_id: str, is_syncing: bool, last_sync_ms: int) -> None:
def send_user_sync(self, user_id, is_syncing, last_sync_ms):
if self._presence_enabled:
self.hs.get_tcp_replication().send_user_sync(
self.instance_id, user_id, is_syncing, last_sync_ms
)
def mark_as_coming_online(self, user_id: str) -> None:
def mark_as_coming_online(self, user_id):
"""A user has started syncing. Send a UserSync to the presence writer,
unless they had recently stopped syncing.
Args:
user_id (str)
"""
going_offline = self.users_going_offline.pop(user_id, None)
if not going_offline:
@@ -369,15 +371,18 @@ class WorkerPresenceHandler(BasePresenceHandler):
# were offline
self.send_user_sync(user_id, True, self.clock.time_msec())
def mark_as_going_offline(self, user_id: str) -> None:
def mark_as_going_offline(self, user_id):
"""A user has stopped syncing. We wait before notifying the presence
writer as its likely they'll come back soon. This allows us to avoid
sending a stopped syncing immediately followed by a started syncing
notification to the presence writer
Args:
user_id (str)
"""
self.users_going_offline[user_id] = self.clock.time_msec()
def send_stop_syncing(self) -> None:
def send_stop_syncing(self):
"""Check if there are any users who have stopped syncing a while ago and
haven't come back yet. If there are poke the presence writer about them.
"""
@@ -425,9 +430,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
return _user_syncing()
async def notify_from_replication(
self, states: List[UserPresenceState], stream_id: int
) -> None:
async def notify_from_replication(self, states, stream_id):
parties = await get_interested_parties(self.store, self.presence_router, states)
room_ids_to_states, users_to_states = parties
@@ -475,12 +478,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
if count > 0
]
async def set_state(
self,
target_user: UserID,
state: JsonDict,
ignore_status_msg: bool = False,
) -> None:
async def set_state(self, target_user, state, ignore_status_msg=False):
"""Set the presence state of the user."""
presence = state["presence"]
@@ -510,7 +508,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
ignore_status_msg=ignore_status_msg,
)
async def bump_presence_active_time(self, user: UserID) -> None:
async def bump_presence_active_time(self, user):
"""We've seen the user do something that indicates they're interacting
with the app.
"""
@@ -594,8 +592,8 @@ class PresenceHandler(BasePresenceHandler):
# we assume that all the sync requests on that process have stopped.
# Stored as a dict from process_id to set of user_id, and a dict of
# process_id to millisecond timestamp last updated.
self.external_process_to_current_syncs = {} # type: Dict[str, Set[str]]
self.external_process_last_updated_ms = {} # type: Dict[str, int]
self.external_process_to_current_syncs = {} # type: Dict[int, Set[str]]
self.external_process_last_updated_ms = {} # type: Dict[int, int]
self.external_sync_linearizer = Linearizer(name="external_sync_linearizer")
@@ -635,7 +633,7 @@ class PresenceHandler(BasePresenceHandler):
self._event_pos = self.store.get_current_events_token()
self._event_processing = False
async def _on_shutdown(self) -> None:
async def _on_shutdown(self):
"""Gets called when shutting down. This lets us persist any updates that
we haven't yet persisted, e.g. updates that only changes some internal
timers. This allows changes to persist across startup without having to
@@ -664,7 +662,7 @@ class PresenceHandler(BasePresenceHandler):
)
logger.info("Finished _on_shutdown")
async def _persist_unpersisted_changes(self) -> None:
async def _persist_unpersisted_changes(self):
"""We periodically persist the unpersisted changes, as otherwise they
may stack up and slow down shutdown times.
"""
@@ -764,7 +762,7 @@ class PresenceHandler(BasePresenceHandler):
states, destinations
)
async def _handle_timeouts(self) -> None:
async def _handle_timeouts(self):
"""Checks the presence of users that have timed out and updates as
appropriate.
"""
@@ -816,7 +814,7 @@ class PresenceHandler(BasePresenceHandler):
return await self._update_states(changes)
async def bump_presence_active_time(self, user: UserID) -> None:
async def bump_presence_active_time(self, user):
"""We've seen the user do something that indicates they're interacting
with the app.
"""
@@ -913,17 +911,17 @@ class PresenceHandler(BasePresenceHandler):
return []
async def update_external_syncs_row(
self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int
) -> None:
self, process_id, user_id, is_syncing, sync_time_msec
):
"""Update the syncing users for an external process as a delta.
Args:
process_id: An identifier for the process the users are
process_id (str): An identifier for the process the users are
syncing against. This allows synapse to process updates
as user start and stop syncing against a given process.
user_id: The user who has started or stopped syncing
is_syncing: Whether or not the user is now syncing
sync_time_msec: Time in ms when the user was last syncing
user_id (str): The user who has started or stopped syncing
is_syncing (bool): Whether or not the user is now syncing
sync_time_msec(int): Time in ms when the user was last syncing
"""
with (await self.external_sync_linearizer.queue(process_id)):
prev_state = await self.current_state_for_user(user_id)
@@ -960,7 +958,7 @@ class PresenceHandler(BasePresenceHandler):
self.external_process_last_updated_ms[process_id] = self.clock.time_msec()
async def update_external_syncs_clear(self, process_id: str) -> None:
async def update_external_syncs_clear(self, process_id):
"""Marks all users that had been marked as syncing by a given process
as offline.
@@ -981,12 +979,12 @@ class PresenceHandler(BasePresenceHandler):
)
self.external_process_last_updated_ms.pop(process_id, None)
async def current_state_for_user(self, user_id: str) -> UserPresenceState:
async def current_state_for_user(self, user_id):
"""Get the current presence state for a user."""
res = await self.current_state_for_users([user_id])
return res[user_id]
async def _persist_and_notify(self, states: List[UserPresenceState]) -> None:
async def _persist_and_notify(self, states):
"""Persist states in the database, poke the notifier and send to
interested remote servers
"""
@@ -1007,7 +1005,7 @@ class PresenceHandler(BasePresenceHandler):
# stream (which is updated by `store.update_presence`).
await self.maybe_send_presence_to_interested_destinations(states)
async def incoming_presence(self, origin: str, content: JsonDict) -> None:
async def incoming_presence(self, origin, content):
"""Called when we receive a `m.presence` EDU from a remote server."""
if not self._presence_enabled:
return
@@ -1057,9 +1055,7 @@ class PresenceHandler(BasePresenceHandler):
federation_presence_counter.inc(len(updates))
await self._update_states(updates)
async def set_state(
self, target_user: UserID, state: JsonDict, ignore_status_msg: bool = False
) -> None:
async def set_state(self, target_user, state, ignore_status_msg=False):
"""Set the presence state of the user."""
status_msg = state.get("status_msg", None)
presence = state["presence"]
@@ -1093,7 +1089,7 @@ class PresenceHandler(BasePresenceHandler):
await self._update_states([prev_state.copy_and_replace(**new_fields)])
async def is_visible(self, observed_user: UserID, observer_user: UserID) -> bool:
async def is_visible(self, observed_user, observer_user):
"""Returns whether a user can see another user's presence."""
observer_room_ids = await self.store.get_rooms_for_user(
observer_user.to_string()
@@ -1148,7 +1144,7 @@ class PresenceHandler(BasePresenceHandler):
)
return rows
def notify_new_event(self) -> None:
def notify_new_event(self):
"""Called when new events have happened. Handles users and servers
joining rooms and require being sent presence.
"""
@@ -1167,7 +1163,7 @@ class PresenceHandler(BasePresenceHandler):
run_as_background_process("presence.notify_new_event", _process_presence)
async def _unsafe_process(self) -> None:
async def _unsafe_process(self):
# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "presence_delta"):
@@ -1192,7 +1188,7 @@ class PresenceHandler(BasePresenceHandler):
max_pos
)
async def _handle_state_delta(self, deltas: List[JsonDict]) -> None:
async def _handle_state_delta(self, deltas):
"""Process current state deltas to find new joins that need to be
handled.
"""
@@ -1315,7 +1311,7 @@ class PresenceHandler(BasePresenceHandler):
return [remote_host], states
def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> bool:
def should_notify(old_state, new_state):
"""Decides if a presence state change should be sent to interested parties."""
if old_state == new_state:
return False
@@ -1351,9 +1347,7 @@ def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) ->
return False
def format_user_presence_state(
state: UserPresenceState, now: int, include_user_id: bool = True
) -> JsonDict:
def format_user_presence_state(state, now, include_user_id=True):
"""Convert UserPresenceState to a format that can be sent down to clients
and to other servers.
@@ -1391,11 +1385,11 @@ class PresenceEventSource:
@log_function
async def get_new_events(
self,
user: UserID,
from_key: Optional[int],
room_ids: Optional[List[str]] = None,
include_offline: bool = True,
explicit_room_id: Optional[str] = None,
user,
from_key,
room_ids=None,
include_offline=True,
explicit_room_id=None,
**kwargs,
) -> Tuple[List[UserPresenceState], int]:
# The process for getting presence events are:
@@ -1600,7 +1594,7 @@ class PresenceEventSource:
if update.state != PresenceState.OFFLINE
]
def get_current_key(self) -> int:
def get_current_key(self):
return self.store.get_current_presence_token()
@cached(num_args=2, cache_context=True)
@@ -1660,20 +1654,15 @@ class PresenceEventSource:
return users_interested_in
def handle_timeouts(
user_states: List[UserPresenceState],
is_mine_fn: Callable[[str], bool],
syncing_user_ids: Set[str],
now: int,
) -> List[UserPresenceState]:
def handle_timeouts(user_states, is_mine_fn, syncing_user_ids, now):
"""Checks the presence of users that have timed out and updates as
appropriate.
Args:
user_states: List of UserPresenceState's to check.
is_mine_fn: Function that returns if a user_id is ours
syncing_user_ids: Set of user_ids with active syncs.
now: Current time in ms.
user_states(list): List of UserPresenceState's to check.
is_mine_fn (fn): Function that returns if a user_id is ours
syncing_user_ids (set): Set of user_ids with active syncs.
now (int): Current time in ms.
Returns:
List of UserPresenceState updates
@@ -1690,16 +1679,14 @@ def handle_timeouts(
return list(changes.values())
def handle_timeout(
state: UserPresenceState, is_mine: bool, syncing_user_ids: Set[str], now: int
) -> Optional[UserPresenceState]:
def handle_timeout(state, is_mine, syncing_user_ids, now):
"""Checks the presence of the user to see if any of the timers have elapsed
Args:
state
is_mine: Whether the user is ours
syncing_user_ids: Set of user_ids with active syncs.
now: Current time in ms.
state (UserPresenceState)
is_mine (bool): Whether the user is ours
syncing_user_ids (set): Set of user_ids with active syncs.
now (int): Current time in ms.
Returns:
A UserPresenceState update or None if no update.
@@ -1751,29 +1738,23 @@ def handle_timeout(
return state if changed else None
def handle_update(
prev_state: UserPresenceState,
new_state: UserPresenceState,
is_mine: bool,
wheel_timer: WheelTimer,
now: int,
) -> Tuple[UserPresenceState, bool, bool]:
def handle_update(prev_state, new_state, is_mine, wheel_timer, now):
"""Given a presence update:
1. Add any appropriate timers.
2. Check if we should notify anyone.
Args:
prev_state
new_state
is_mine: Whether the user is ours
wheel_timer
now: Time now in ms
prev_state (UserPresenceState)
new_state (UserPresenceState)
is_mine (bool): Whether the user is ours
wheel_timer (WheelTimer)
now (int): Time now in ms
Returns:
3-tuple: `(new_state, persist_and_notify, federation_ping)` where:
- new_state: is the state to actually persist
- persist_and_notify: whether to persist and notify people
- federation_ping: whether we should send a ping over federation
- persist_and_notify (bool): whether to persist and notify people
- federation_ping (bool): whether we should send a ping over federation
"""
user_id = new_state.user_id

View File

@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Pattern, Tuple, Union
from synapse.events import EventBase
from synapse.types import UserID
from synapse.util import glob_to_regex, re_word_boundary
from synapse.util.caches.lrucache import LruCache
logger = logging.getLogger(__name__)
@@ -183,7 +184,7 @@ class PushRuleEvaluatorForEvent:
r = regex_cache.get((display_name, False, True), None)
if not r:
r1 = re.escape(display_name)
r1 = _re_word_boundary(r1)
r1 = re_word_boundary(r1)
r = re.compile(r1, flags=re.IGNORECASE)
regex_cache[(display_name, False, True)] = r
@@ -212,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
try:
r = regex_cache.get((glob, True, word_boundary), None)
if not r:
r = _glob_to_re(glob, word_boundary)
r = glob_to_regex(glob, word_boundary)
regex_cache[(glob, True, word_boundary)] = r
return bool(r.search(value))
except re.error:
@@ -220,56 +221,6 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
return False
def _glob_to_re(glob: str, word_boundary: bool) -> Pattern:
"""Generates regex for a given glob.
Args:
glob
word_boundary: Whether to match against word boundaries or entire string.
"""
if IS_GLOB.search(glob):
r = re.escape(glob)
r = r.replace(r"\*", ".*?")
r = r.replace(r"\?", ".")
# handle [abc], [a-z] and [!a-z] style ranges.
r = GLOB_REGEX.sub(
lambda x: (
"[%s%s]" % (x.group(1) and "^" or "", x.group(2).replace(r"\\\-", "-"))
),
r,
)
if word_boundary:
r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE)
else:
r = "^" + r + "$"
return re.compile(r, flags=re.IGNORECASE)
elif word_boundary:
r = re.escape(glob)
r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE)
else:
r = "^" + re.escape(glob) + "$"
return re.compile(r, flags=re.IGNORECASE)
def _re_word_boundary(r: str) -> str:
"""
Adds word boundary characters to the start and end of an
expression to require that the match occur as a whole word,
but do so respecting the fact that strings starting or ending
with non-word characters will change word boundaries.
"""
# we can't use \b as it chokes on unicode. however \W seems to be okay
# as shorthand for [^0-9A-Za-z_].
return r"(^|\W)%s(\W|$)" % (r,)
def _flatten_dict(
d: Union[EventBase, dict],
prefix: Optional[List[str]] = None,

View File

@@ -78,7 +78,8 @@ REQUIREMENTS = [
# we use attr.validators.deep_iterable, which arrived in 19.1.0 (Note:
# Fedora 31 only has 19.1, so if we want to upgrade we should wait until 33
# is out in November.)
"attrs>=19.1.0",
# Note: 21.1.0 broke `/sync`, see #9936
"attrs>=19.1.0,!=21.1.0",
"netaddr>=0.7.18",
"Jinja2>=2.9",
"bleach>=1.4.3",

View File

@@ -39,13 +39,6 @@ class WellKnownBuilder:
result = {"m.homeserver": {"base_url": self._config.public_baseurl}}
# Point to BigBlueButton widget for calls
result.update({
"io.element.call_behaviour": {
"widget_build_url": "http://localhost:8184/api/v1/dimension/bigbluebutton/widget_state",
}
})
if self._config.default_identity_server:
result["m.identity_server"] = {
"base_url": self._config.default_identity_server

View File

@@ -715,9 +715,7 @@ class DatabasePool:
# pool).
assert not self.engine.in_transaction(conn)
with LoggingContext(
str(curr_context), parent_context=parent_context
) as context:
with LoggingContext("runWithConnection", parent_context) as context:
sched_duration_sec = monotonic_time() - start_time
sql_scheduling_timer.observe(sched_duration_sec)
context.add_database_scheduled(sched_duration_sec)

View File

@@ -15,6 +15,7 @@
import json
import logging
import re
from typing import Pattern
import attr
from frozendict import frozendict
@@ -26,6 +27,9 @@ from synapse.logging import context
logger = logging.getLogger(__name__)
_WILDCARD_RUN = re.compile(r"([\?\*]+)")
def _reject_invalid_json(val):
"""Do not allow Infinity, -Infinity, or NaN values in JSON."""
raise ValueError("Invalid JSON value: '%s'" % val)
@@ -158,25 +162,54 @@ def log_failure(failure, msg, consumeErrors=True):
return failure
def glob_to_regex(glob):
def glob_to_regex(glob: str, word_boundary: bool = False) -> Pattern:
"""Converts a glob to a compiled regex object.
The regex is anchored at the beginning and end of the string.
Args:
glob (str)
glob: pattern to match
word_boundary: If True, the pattern will be allowed to match at word boundaries
anywhere in the string. Otherwise, the pattern is anchored at the start and
end of the string.
Returns:
re.RegexObject
compiled regex pattern
"""
res = ""
for c in glob:
if c == "*":
res = res + ".*"
elif c == "?":
res = res + "."
else:
res = res + re.escape(c)
# \A anchors at start of string, \Z at end of string
return re.compile(r"\A" + res + r"\Z", re.IGNORECASE)
# Patterns with wildcards must be simplified to avoid performance cliffs
# - The glob `?**?**?` is equivalent to the glob `???*`
# - The glob `???*` is equivalent to the regex `.{3,}`
chunks = []
for chunk in _WILDCARD_RUN.split(glob):
# No wildcards? re.escape()
if not _WILDCARD_RUN.match(chunk):
chunks.append(re.escape(chunk))
continue
# Wildcards? Simplify.
qmarks = chunk.count("?")
if "*" in chunk:
chunks.append(".{%d,}" % qmarks)
else:
chunks.append(".{%d}" % qmarks)
res = "".join(chunks)
if word_boundary:
res = re_word_boundary(res)
else:
# \A anchors at start of string, \Z at end of string
res = r"\A" + res + r"\Z"
return re.compile(res, re.IGNORECASE)
def re_word_boundary(r: str) -> str:
"""
Adds word boundary characters to the start and end of an
expression to require that the match occur as a whole word,
but do so respecting the fact that strings starting or ending
with non-word characters will change word boundaries.
"""
# we can't use \b as it chokes on unicode. however \W seems to be okay
# as shorthand for [^0-9A-Za-z_].
return r"(^|\W)%s(\W|$)" % (r,)

View File

@@ -17,10 +17,8 @@ from functools import wraps
from typing import (
Any,
Callable,
Collection,
Generic,
Iterable,
List,
Optional,
Type,
TypeVar,
@@ -59,56 +57,13 @@ class _Node:
__slots__ = ["prev_node", "next_node", "key", "value", "callbacks"]
def __init__(
self,
prev_node,
next_node,
key,
value,
callbacks: Collection[Callable[[], None]] = (),
self, prev_node, next_node, key, value, callbacks: Optional[set] = None
):
self.prev_node = prev_node
self.next_node = next_node
self.key = key
self.value = value
# Set of callbacks to run when the node gets deleted. We store as a list
# rather than a set to keep memory usage down (and since we expect few
# entries per node, the performance of checking for duplication in a
# list vs using a set is negligible).
#
# Note that we store this as an optional list to keep the memory
# footprint down. Storing `None` is free as its a singleton, while empty
# lists are 56 bytes (and empty sets are 216 bytes, if we did the naive
# thing and used sets).
self.callbacks = None # type: Optional[List[Callable[[], None]]]
self.add_callbacks(callbacks)
def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
"""Add to stored list of callbacks, removing duplicates."""
if not callbacks:
return
if not self.callbacks:
self.callbacks = []
for callback in callbacks:
if callback not in self.callbacks:
self.callbacks.append(callback)
def run_and_clear_callbacks(self) -> None:
"""Run all callbacks and clear the stored list of callbacks. Used when
the node is being deleted.
"""
if not self.callbacks:
return
for callback in self.callbacks:
callback()
self.callbacks = None
self.callbacks = callbacks or set()
class LruCache(Generic[KT, VT]):
@@ -222,10 +177,10 @@ class LruCache(Generic[KT, VT]):
self.len = synchronized(cache_len)
def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
def add_node(key, value, callbacks: Optional[set] = None):
prev_node = list_root
next_node = prev_node.next_node
node = _Node(prev_node, next_node, key, value, callbacks)
node = _Node(prev_node, next_node, key, value, callbacks or set())
prev_node.next_node = node
next_node.prev_node = node
cache[key] = node
@@ -256,15 +211,16 @@ class LruCache(Generic[KT, VT]):
deleted_len = size_callback(node.value)
cached_cache_len[0] -= deleted_len
node.run_and_clear_callbacks()
for cb in node.callbacks:
cb()
node.callbacks.clear()
return deleted_len
@overload
def cache_get(
key: KT,
default: Literal[None] = None,
callbacks: Collection[Callable[[], None]] = ...,
callbacks: Iterable[Callable[[], None]] = ...,
update_metrics: bool = ...,
) -> Optional[VT]:
...
@@ -273,7 +229,7 @@ class LruCache(Generic[KT, VT]):
def cache_get(
key: KT,
default: T,
callbacks: Collection[Callable[[], None]] = ...,
callbacks: Iterable[Callable[[], None]] = ...,
update_metrics: bool = ...,
) -> Union[T, VT]:
...
@@ -282,13 +238,13 @@ class LruCache(Generic[KT, VT]):
def cache_get(
key: KT,
default: Optional[T] = None,
callbacks: Collection[Callable[[], None]] = (),
callbacks: Iterable[Callable[[], None]] = (),
update_metrics: bool = True,
):
node = cache.get(key, None)
if node is not None:
move_node_to_front(node)
node.add_callbacks(callbacks)
node.callbacks.update(callbacks)
if update_metrics and metrics:
metrics.inc_hits()
return node.value
@@ -304,8 +260,10 @@ class LruCache(Generic[KT, VT]):
# We sometimes store large objects, e.g. dicts, which cause
# the inequality check to take a long time. So let's only do
# the check if we have some callbacks to call.
if value != node.value:
node.run_and_clear_callbacks()
if node.callbacks and value != node.value:
for cb in node.callbacks:
cb()
node.callbacks.clear()
# We don't bother to protect this by value != node.value as
# generally size_callback will be cheap compared with equality
@@ -315,7 +273,7 @@ class LruCache(Generic[KT, VT]):
cached_cache_len[0] -= size_callback(node.value)
cached_cache_len[0] += size_callback(value)
node.add_callbacks(callbacks)
node.callbacks.update(callbacks)
move_node_to_front(node)
node.value = value
@@ -368,7 +326,8 @@ class LruCache(Generic[KT, VT]):
list_root.next_node = list_root
list_root.prev_node = list_root
for node in cache.values():
node.run_and_clear_callbacks()
for cb in node.callbacks:
cb()
cache.clear()
if size_callback:
cached_cache_len[0] = 0

View File

@@ -74,6 +74,25 @@ class ServerACLsTestCase(unittest.TestCase):
self.assertFalse(server_matches_acl_event("[1:2::]", e))
self.assertTrue(server_matches_acl_event("1:2:3:4", e))
def test_wildcard_matching(self):
e = _create_acl_event({"allow": ["good*.com"]})
self.assertTrue(
server_matches_acl_event("good.com", e),
"* matches 0 characters",
)
self.assertTrue(
server_matches_acl_event("GOOD.COM", e),
"pattern is case-insensitive",
)
self.assertTrue(
server_matches_acl_event("good.aa.com", e),
"* matches several characters, including '.'",
)
self.assertFalse(
server_matches_acl_event("ishgood.com", e),
"pattern does not allow prefixes",
)
class StateQueryTests(unittest.FederatingHomeserverTestCase):

View File

@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict
from synapse.api.room_versions import RoomVersions
from synapse.events import FrozenEvent
from synapse.push import push_rule_evaluator
@@ -66,6 +68,170 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
# A display name with spaces should work fine.
self.assertTrue(evaluator.matches(condition, "@user:test", "foo bar"))
def _assert_matches(
self, condition: Dict[str, Any], content: Dict[str, Any], msg=None
) -> None:
evaluator = self._get_evaluator(content)
self.assertTrue(evaluator.matches(condition, "@user:test", "display_name"), msg)
def _assert_not_matches(
self, condition: Dict[str, Any], content: Dict[str, Any], msg=None
) -> None:
evaluator = self._get_evaluator(content)
self.assertFalse(
evaluator.matches(condition, "@user:test", "display_name"), msg
)
def test_event_match_body(self):
"""Check that event_match conditions on content.body work as expected"""
# if the key is `content.body`, the pattern matches substrings.
# non-wildcards should match
condition = {
"kind": "event_match",
"key": "content.body",
"pattern": "foobaz",
}
self._assert_matches(
condition,
{"body": "aaa FoobaZ zzz"},
"patterns should match and be case-insensitive",
)
self._assert_not_matches(
condition,
{"body": "aa xFoobaZ yy"},
"pattern should only match at word boundaries",
)
self._assert_not_matches(
condition,
{"body": "aa foobazx yy"},
"pattern should only match at word boundaries",
)
# wildcards should match
condition = {
"kind": "event_match",
"key": "content.body",
"pattern": "f?o*baz",
}
self._assert_matches(
condition,
{"body": "aaa FoobarbaZ zzz"},
"* should match string and pattern should be case-insensitive",
)
self._assert_matches(
condition, {"body": "aa foobaz yy"}, "* should match 0 characters"
)
self._assert_not_matches(
condition, {"body": "aa fobbaz yy"}, "? should not match 0 characters"
)
self._assert_not_matches(
condition, {"body": "aa fiiobaz yy"}, "? should not match 2 characters"
)
self._assert_not_matches(
condition,
{"body": "aa xfooxbaz yy"},
"pattern should only match at word boundaries",
)
self._assert_not_matches(
condition,
{"body": "aa fooxbazx yy"},
"pattern should only match at word boundaries",
)
# test backslashes
condition = {
"kind": "event_match",
"key": "content.body",
"pattern": r"f\oobaz",
}
self._assert_matches(
condition,
{"body": r"F\oobaz"},
"backslash should match itself",
)
condition = {
"kind": "event_match",
"key": "content.body",
"pattern": r"f\?obaz",
}
self._assert_matches(
condition,
{"body": r"F\oobaz"},
r"? after \ should match any character",
)
def test_event_match_non_body(self):
"""Check that event_match conditions on other keys work as expected"""
# if the key is anything other than 'content.body', the pattern must match the
# whole value.
# non-wildcards should match
condition = {
"kind": "event_match",
"key": "content.value",
"pattern": "foobaz",
}
self._assert_matches(
condition,
{"value": "FoobaZ"},
"patterns should match and be case-insensitive",
)
self._assert_not_matches(
condition,
{"value": "xFoobaZ"},
"pattern should only match at the start/end of the value",
)
self._assert_not_matches(
condition,
{"value": "FoobaZz"},
"pattern should only match at the start/end of the value",
)
# wildcards should match
condition = {
"kind": "event_match",
"key": "content.value",
"pattern": "f?o*baz",
}
self._assert_matches(
condition,
{"value": "FoobarbaZ"},
"* should match string and pattern should be case-insensitive",
)
self._assert_matches(
condition, {"value": "foobaz"}, "* should match 0 characters"
)
self._assert_not_matches(
condition, {"value": "fobbaz"}, "? should not match 0 characters"
)
self._assert_not_matches(
condition, {"value": "fiiobaz"}, "? should not match 2 characters"
)
self._assert_not_matches(
condition,
{"value": "xfooxbaz"},
"pattern should only match at the start/end of the value",
)
self._assert_not_matches(
condition,
{"value": "fooxbazx"},
"pattern should only match at the start/end of the value",
)
self._assert_not_matches(
condition,
{"value": "x\nfooxbaz"},
"pattern should not match after a newline",
)
self._assert_not_matches(
condition,
{"value": "fooxbaz\nx"},
"pattern should not match before a newline",
)
def test_no_body(self):
"""Not having a body shouldn't break the evaluator."""
evaluator = self._get_evaluator({})

View File

@@ -0,0 +1,59 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from synapse.util import glob_to_regex
from tests.unittest import TestCase
class GlobToRegexTestCase(TestCase):
def test_literal_match(self):
"""patterns without wildcards should match"""
pat = glob_to_regex("foobaz")
self.assertTrue(
pat.match("FoobaZ"), "patterns should match and be case-insensitive"
)
self.assertFalse(
pat.match("x foobaz"), "pattern should not match at word boundaries"
)
def test_wildcard_match(self):
pat = glob_to_regex("f?o*baz")
self.assertTrue(
pat.match("FoobarbaZ"),
"* should match string and pattern should be case-insensitive",
)
self.assertTrue(pat.match("foobaz"), "* should match 0 characters")
self.assertFalse(pat.match("fooxaz"), "the character after * must match")
self.assertFalse(pat.match("fobbaz"), "? should not match 0 characters")
self.assertFalse(pat.match("fiiobaz"), "? should not match 2 characters")
def test_multi_wildcard(self):
"""patterns with multiple wildcards in a row should match"""
pat = glob_to_regex("**baz")
self.assertTrue(pat.match("agsgsbaz"), "** should match any string")
self.assertTrue(pat.match("baz"), "** should match the empty string")
self.assertEqual(pat.pattern, r"\A.{0,}baz\Z")
pat = glob_to_regex("*?baz")
self.assertTrue(pat.match("agsgsbaz"), "*? should match any string")
self.assertTrue(pat.match("abaz"), "*? should match a single char")
self.assertFalse(pat.match("baz"), "*? should not match the empty string")
self.assertEqual(pat.pattern, r"\A.{1,}baz\Z")
pat = glob_to_regex("a?*?*?baz")
self.assertTrue(pat.match("a g baz"), "?*?*? should match 3 chars")
self.assertFalse(pat.match("a..baz"), "?*?*? should not match 2 chars")
self.assertTrue(pat.match("a.gg.baz"), "?*?*? should match 4 chars")
self.assertEqual(pat.pattern, r"\Aa.{3,}baz\Z")