Compare commits
13 Commits
quenting/l
...
mv/add-mxi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5d0352c207 | ||
|
|
2da21f6204 | ||
|
|
01c582ff36 | ||
|
|
42a392f4e2 | ||
|
|
830a29482a | ||
|
|
44f7df09bb | ||
|
|
25597e97f3 | ||
|
|
7ad75e6d20 | ||
|
|
7c224e149b | ||
|
|
1fe7be0be1 | ||
|
|
dc9957dbba | ||
|
|
09a7adf85d | ||
|
|
aa9e47e144 |
1
changelog.d/16060.misc
Normal file
1
changelog.d/16060.misc
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Add logging of sender invalid mxids when persisting events and receiving EDUs.
|
||||||
@@ -1106,6 +1106,10 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
|
|||||||
)
|
)
|
||||||
prev_ids = [str(p) for p in prev_ids] # They may come as ints
|
prev_ids = [str(p) for p in prev_ids] # They may come as ints
|
||||||
|
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
if get_domain_from_id(user_id) != origin:
|
if get_domain_from_id(user_id) != origin:
|
||||||
# TODO: Raise?
|
# TODO: Raise?
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
@@ -109,6 +109,10 @@ class DeviceMessageHandler:
|
|||||||
origin,
|
origin,
|
||||||
sender_user_id,
|
sender_user_id,
|
||||||
)
|
)
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(sender_user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
message_type = content["type"]
|
message_type = content["type"]
|
||||||
message_id = content["message_id"]
|
message_id = content["message_id"]
|
||||||
for user_id, by_device in content["messages"].items():
|
for user_id, by_device in content["messages"].items():
|
||||||
|
|||||||
@@ -1593,6 +1593,10 @@ class SigningKeyEduUpdater:
|
|||||||
logger.warning("Got signing key update edu for %r from %r", user_id, origin)
|
logger.warning("Got signing key update edu for %r from %r", user_id, origin)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
room_ids = await self.store.get_rooms_for_user(user_id)
|
room_ids = await self.store.get_rooms_for_user(user_id)
|
||||||
if not room_ids:
|
if not room_ids:
|
||||||
# We don't share any rooms with this user. Ignore update, as we
|
# We don't share any rooms with this user. Ignore update, as we
|
||||||
|
|||||||
@@ -117,6 +117,10 @@ class ReceiptsHandler:
|
|||||||
max_batch_id: Optional[int] = None
|
max_batch_id: Optional[int] = None
|
||||||
|
|
||||||
for receipt in receipts:
|
for receipt in receipts:
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(receipt.user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
res = await self.store.insert_receipt(
|
res = await self.store.insert_receipt(
|
||||||
receipt.room_id,
|
receipt.room_id,
|
||||||
receipt.receipt_type,
|
receipt.receipt_type,
|
||||||
|
|||||||
@@ -370,6 +370,10 @@ class TypingWriterHandler(FollowerTypingHandler):
|
|||||||
room_id = content["room_id"]
|
room_id = content["room_id"]
|
||||||
user_id = content["user_id"]
|
user_id = content["user_id"]
|
||||||
|
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
# If we're not in the room just ditch the event entirely. This is
|
# If we're not in the room just ditch the event entirely. This is
|
||||||
# probably an old server that has come back and thinks we're still in
|
# probably an old server that has come back and thinks we're still in
|
||||||
# the room (or we've been rejoined to the room by a state reset).
|
# the room (or we've been rejoined to the room by a state reset).
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ from synapse.types import (
|
|||||||
PersistedEventPosition,
|
PersistedEventPosition,
|
||||||
RoomStreamToken,
|
RoomStreamToken,
|
||||||
StateMap,
|
StateMap,
|
||||||
|
UserID,
|
||||||
get_domain_from_id,
|
get_domain_from_id,
|
||||||
)
|
)
|
||||||
from synapse.types.state import StateFilter
|
from synapse.types.state import StateFilter
|
||||||
@@ -397,6 +398,10 @@ class EventsPersistenceStorageController:
|
|||||||
event_ids: List[str] = []
|
event_ids: List[str] = []
|
||||||
partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
|
partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
|
||||||
for event, ctx in events_and_contexts:
|
for event, ctx in events_and_contexts:
|
||||||
|
# The result of `is_valid` is not used yet because for now we only want to
|
||||||
|
# log invalid mxids in the wild.
|
||||||
|
UserID.is_valid(event.user_id, allow_historical_mxids=True)
|
||||||
|
|
||||||
partitioned.setdefault(event.room_id, []).append((event, ctx))
|
partitioned.setdefault(event.room_id, []).append((event, ctx))
|
||||||
event_ids.append(event.event_id)
|
event_ids.append(event.event_id)
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import abc
|
import abc
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
@@ -64,6 +65,9 @@ if TYPE_CHECKING:
|
|||||||
from synapse.storage.databases.main import DataStore, PurgeEventsStore
|
from synapse.storage.databases.main import DataStore, PurgeEventsStore
|
||||||
from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
|
from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Define a state map type from type/state_key to T (usually an event ID or
|
# Define a state map type from type/state_key to T (usually an event ID or
|
||||||
# event)
|
# event)
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
@@ -306,7 +310,7 @@ class DomainSpecificString(metaclass=abc.ABCMeta):
|
|||||||
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
|
return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_valid(cls: Type[DS], s: str) -> bool:
|
def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
|
||||||
"""Parses the input string and attempts to ensure it is valid."""
|
"""Parses the input string and attempts to ensure it is valid."""
|
||||||
# TODO: this does not reject an empty localpart or an overly-long string.
|
# TODO: this does not reject an empty localpart or an overly-long string.
|
||||||
# See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
|
# See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
|
||||||
@@ -329,6 +333,35 @@ class UserID(DomainSpecificString):
|
|||||||
|
|
||||||
SIGIL = "@"
|
SIGIL = "@"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
|
||||||
|
""""""
|
||||||
|
"""Parses the user id str and attempts to ensure it is valid per the spec.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
allow_historical_mxids: True to allow historical mxids, which can
|
||||||
|
include all printable ASCII chars minus `:`
|
||||||
|
Returns:
|
||||||
|
False if the user ID is invalid per the spec
|
||||||
|
"""
|
||||||
|
allow_historical_mxids = kwargs.get("allow_historical_mxids", False)
|
||||||
|
|
||||||
|
is_valid = DomainSpecificString.is_valid(s)
|
||||||
|
|
||||||
|
if len(s.encode("utf-8")) > 255:
|
||||||
|
logger.warn(
|
||||||
|
f"User ID {s} has more than 255 bytes and is invalid per the spec"
|
||||||
|
)
|
||||||
|
is_valid = False
|
||||||
|
obj = UserID.from_string(s)
|
||||||
|
if contains_invalid_mxid_characters(obj.localpart, allow_historical_mxids):
|
||||||
|
logger.warn(
|
||||||
|
f"localpart of User ID {s} contains invalid characters per the spec"
|
||||||
|
)
|
||||||
|
is_valid = False
|
||||||
|
|
||||||
|
return is_valid
|
||||||
|
|
||||||
|
|
||||||
@attr.s(slots=True, frozen=True, repr=False)
|
@attr.s(slots=True, frozen=True, repr=False)
|
||||||
class RoomAlias(DomainSpecificString):
|
class RoomAlias(DomainSpecificString):
|
||||||
@@ -355,22 +388,30 @@ MXID_LOCALPART_ALLOWED_CHARACTERS = set(
|
|||||||
"_-./=+" + string.ascii_lowercase + string.digits
|
"_-./=+" + string.ascii_lowercase + string.digits
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MXID_HISTORICAL_LOCALPART_ALLOWED_CHARACTERS = set(string.printable.replace(":", ""))
|
||||||
|
|
||||||
# Guest user IDs are purely numeric.
|
# Guest user IDs are purely numeric.
|
||||||
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
|
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
|
||||||
|
|
||||||
|
|
||||||
def contains_invalid_mxid_characters(localpart: str) -> bool:
|
def contains_invalid_mxid_characters(
|
||||||
|
localpart: str, allow_historical_mxids: Optional[bool] = False
|
||||||
|
) -> bool:
|
||||||
"""Check for characters not allowed in an mxid or groupid localpart
|
"""Check for characters not allowed in an mxid or groupid localpart
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
localpart: the localpart to be checked
|
localpart: the localpart to be checked
|
||||||
use_extended_character_set: True to use the extended allowed characters
|
allow_historical_mxids: True to allow historical mxids, which can
|
||||||
from MSC4009.
|
include all printable ASCII chars minus `:`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if there are any naughty characters
|
True if there are any naughty characters
|
||||||
"""
|
"""
|
||||||
return any(c not in MXID_LOCALPART_ALLOWED_CHARACTERS for c in localpart)
|
|
||||||
|
if allow_historical_mxids:
|
||||||
|
allowed_characters = MXID_HISTORICAL_LOCALPART_ALLOWED_CHARACTERS
|
||||||
|
else:
|
||||||
|
allowed_characters = MXID_LOCALPART_ALLOWED_CHARACTERS
|
||||||
|
return any(c not in allowed_characters for c in localpart)
|
||||||
|
|
||||||
|
|
||||||
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
|
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
|
||||||
|
|||||||
Reference in New Issue
Block a user