Respond with useful error codes when Content-Length header/s are invalid (#19212)

Related to https://github.com/element-hq/synapse/issues/17035, when
Synapse receives a request that is larger than the maximum size allowed,
it aborts the connection without ever sending back a HTTP response.
I dug into our usage of twisted and how best to try and report such an
error and this is what I came up with.

It would be ideal to be able to report the status from within
`handleContentChunk` but that is called too early on in the twisted http
handling code, before things have been setup enough to be able to
properly write a response.
I tested this change out locally (both with C-S and S-S apis) and they
do receive a 413 response now in addition to the connection being
closed.

Hopefully this will aid in being able to quickly detect when
https://github.com/element-hq/synapse/issues/17035 is occurring as the
current situation makes it very hard to narrow things down to that
specific issue without making a lot of assumptions.

This PR also responds with more meaningful error codes now in the case
of:
- multiple `Content-Length` headers
- invalid `Content-Length` header value
- request content size being larger than the `Content-Length` value

### Pull Request Checklist

<!-- Please read
https://element-hq.github.io/synapse/latest/development/contributing_guide.html
before submitting your pull request -->

* [X] Pull request is based on the develop branch
* [X] Pull request includes a [changelog
file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog).
The entry should:
- Be a short description of your change which makes sense to users.
"Fixed a bug that prevented receiving messages from other servers."
instead of "Moved X method from `EventStore` to `EventWorkerStore`.".
  - Use markdown where necessary, mostly for `code blocks`.
  - End with either a period (.) or an exclamation mark (!).
  - Start with a capital letter.
- Feel free to credit yourself, by adding a sentence "Contributed by
@github_username." or "Contributed by [Your Name]." to the end of the
entry.
* [X] [Code
style](https://element-hq.github.io/synapse/latest/code_style.html) is
correct (run the
[linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters))

---------

Co-authored-by: Eric Eastwood <erice@element.io>
This commit is contained in:
Devon Hudson
2025-12-08 21:39:18 +00:00
committed by GitHub
parent 09fd2645c2
commit 8b0083cad9
10 changed files with 336 additions and 50 deletions

1
changelog.d/19212.misc Normal file
View File

@@ -0,0 +1 @@
Respond with useful error codes with `Content-Length` header/s are invalid.

View File

@@ -29,6 +29,19 @@ from typing import Final
# the max size of a (canonical-json-encoded) event # the max size of a (canonical-json-encoded) event
MAX_PDU_SIZE = 65536 MAX_PDU_SIZE = 65536
# The maximum allowed size of an HTTP request.
# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE
# Max/min size of ints in canonical JSON # Max/min size of ints in canonical JSON
CANONICALJSON_MAX_INT = (2**53) - 1 CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT

View File

@@ -59,7 +59,7 @@ from twisted.python.threadpool import ThreadPool
from twisted.web.resource import Resource from twisted.web.resource import Resource
import synapse.util.caches import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.app import check_bind_error from synapse.app import check_bind_error
from synapse.config import ConfigError from synapse.config import ConfigError
from synapse.config._base import format_config_error from synapse.config._base import format_config_error
@@ -895,17 +895,8 @@ def sdnotify(state: bytes) -> None:
def max_request_body_size(config: HomeServerConfig) -> int: def max_request_body_size(config: HomeServerConfig) -> int:
"""Get a suitable maximum size for incoming HTTP requests""" """Get a suitable maximum size for incoming HTTP requests"""
# Other than media uploads, the biggest request we expect to see is a fully-loaded # Baseline default for any request that isn't configured in the homeserver config
# /federation/v1/send request. max_request_size = MAX_REQUEST_SIZE
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
max_request_size = 200 * MAX_PDU_SIZE
# if we have a media repo enabled, we may need to allow larger uploads than that # if we have a media repo enabled, we may need to allow larger uploads than that
if config.media.can_load_media_repo: if config.media.can_load_media_repo:

View File

@@ -19,6 +19,7 @@
# #
# #
import contextlib import contextlib
import json
import logging import logging
import time import time
from http import HTTPStatus from http import HTTPStatus
@@ -36,6 +37,7 @@ from twisted.web.http import HTTPChannel
from twisted.web.resource import IResource, Resource from twisted.web.resource import IResource, Resource
from twisted.web.server import Request from twisted.web.server import Request
from synapse.api.errors import Codes, SynapseError
from synapse.config.server import ListenerConfig from synapse.config.server import ListenerConfig
from synapse.http import get_request_user_agent, redact_uri from synapse.http import get_request_user_agent, redact_uri
from synapse.http.proxy import ProxySite from synapse.http.proxy import ProxySite
@@ -59,6 +61,10 @@ logger = logging.getLogger(__name__)
_next_request_seq = 0 _next_request_seq = 0
class ContentLengthError(SynapseError):
"""Raised when content-length validation fails."""
class SynapseRequest(Request): class SynapseRequest(Request):
"""Class which encapsulates an HTTP request to synapse. """Class which encapsulates an HTTP request to synapse.
@@ -144,36 +150,150 @@ class SynapseRequest(Request):
self.synapse_site.site_tag, self.synapse_site.site_tag,
) )
def _respond_with_error(self, synapse_error: SynapseError) -> None:
"""Send an error response and close the connection."""
self.setResponseCode(synapse_error.code)
error_response_bytes = json.dumps(synapse_error.error_dict(None)).encode()
self.responseHeaders.setRawHeaders(b"Content-Type", [b"application/json"])
self.responseHeaders.setRawHeaders(
b"Content-Length", [f"{len(error_response_bytes)}"]
)
self.write(error_response_bytes)
self.loseConnection()
def _get_content_length_from_headers(self) -> int | None:
"""Attempts to obtain the `Content-Length` value from the request's headers.
Returns:
Content length as `int` if present. Otherwise `None`.
Raises:
ContentLengthError: if multiple `Content-Length` headers are present or the
value is not an `int`.
"""
content_length_headers = self.requestHeaders.getRawHeaders(b"Content-Length")
if content_length_headers is None:
return None
# If there are multiple `Content-Length` headers return an error.
# We don't want to even try to pick the right one if there are multiple
# as we could run into problems similar to request smuggling vulnerabilities
# which rely on the mismatch of how different systems interpret information.
if len(content_length_headers) != 1:
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
"Multiple Content-Length headers received",
Codes.UNKNOWN,
)
try:
return int(content_length_headers[0])
except (ValueError, TypeError):
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
"Content-Length header value is not a valid integer",
Codes.UNKNOWN,
)
def _validate_content_length(self) -> None:
"""Validate Content-Length header and actual content size.
Raises:
ContentLengthError: If validation fails.
"""
# we should have a `content` by now.
assert self.content, "_validate_content_length() called before gotLength()"
content_length = self._get_content_length_from_headers()
if content_length is None:
return
actual_content_length = self.content.tell()
if content_length > self._max_request_body_size:
logger.info(
"Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s",
self.client,
content_length,
self._max_request_body_size,
self.get_method(),
self.get_redacted_uri(),
)
raise ContentLengthError(
HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
f"Request content is too large (>{self._max_request_body_size})",
Codes.TOO_LARGE,
)
if content_length != actual_content_length:
comparison = (
"smaller" if content_length < actual_content_length else "larger"
)
logger.info(
"Rejecting request from %s because Content-Length %d is %s than the request content size %d: %s %s",
self.client,
content_length,
comparison,
actual_content_length,
self.get_method(),
self.get_redacted_uri(),
)
raise ContentLengthError(
HTTPStatus.BAD_REQUEST,
f"Rejecting request as the Content-Length header value {content_length} "
f"is {comparison} than the actual request content size {actual_content_length}",
Codes.UNKNOWN,
)
# Twisted machinery: this method is called by the Channel once the full request has # Twisted machinery: this method is called by the Channel once the full request has
# been received, to dispatch the request to a resource. # been received, to dispatch the request to a resource.
#
# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once we Twisted releases a fix and we update to a
# version that is patched
def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None: def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
# In the case of a Content-Length header being present, and it's value being too
# large, throw a proper error to make debugging issues due to overly large requests much
# easier. Currently we handle such cases in `handleContentChunk` and abort the
# connection without providing a proper HTTP response.
#
# Attempting to write an HTTP response from within `handleContentChunk` does not
# work, so the code here has been added to at least provide a response in the
# case of the Content-Length header being present.
self.method, self.uri = command, path
self.clientproto = version
try:
self._validate_content_length()
except ContentLengthError as e:
self._respond_with_error(e)
return
# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once Twisted releases a fix and we update to a
# version that is patched
# See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2
if command == b"POST": if command == b"POST":
ctype = self.requestHeaders.getRawHeaders(b"content-type") ctype = self.requestHeaders.getRawHeaders(b"content-type")
if ctype and b"multipart/form-data" in ctype[0]: if ctype and b"multipart/form-data" in ctype[0]:
self.method, self.uri = command, path logger.warning(
self.clientproto = version "Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
self.get_method(),
self.get_redacted_uri(),
)
self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value
self.code_message = bytes( self.code_message = bytes(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii" HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii"
) )
self.responseHeaders.setRawHeaders(b"content-length", [b"0"])
logger.warning( # FIXME: Return a better error response here similar to the
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s", # `error_response_json` returned in other code paths here.
self.client, self.responseHeaders.setRawHeaders(b"Content-Length", [b"0"])
command,
path,
)
self.write(b"") self.write(b"")
self.loseConnection() self.loseConnection()
return return

View File

@@ -22,6 +22,7 @@
from twisted.internet.address import IPv6Address from twisted.internet.address import IPv6Address
from twisted.internet.testing import MemoryReactor, StringTransport from twisted.internet.testing import MemoryReactor, StringTransport
from synapse.app._base import max_request_body_size
from synapse.app.homeserver import SynapseHomeServer from synapse.app.homeserver import SynapseHomeServer
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.util.clock import Clock from synapse.util.clock import Clock
@@ -143,3 +144,104 @@ class SynapseRequestTestCase(HomeserverTestCase):
# we should get a 415 # we should get a 415
self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ") self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ")
def test_content_length_too_large(self) -> None:
"""HTTP requests with Content-Length exceeding max size should be rejected with 413"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
# Send a request with Content-Length header that exceeds the limit.
# Default max is 50MB (from media max_upload_size), so send something larger.
oversized_length = 1 + max_request_body_size(self.hs.config)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(oversized_length).encode() + b"\r\n"
b"\r\n"
b"" + b"x" * oversized_length + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 413 Content Too Large
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 413 ")
self.assertSubstring("M_TOO_LARGE", response)
def test_too_many_content_length_headers(self) -> None:
"""HTTP requests with multiple Content-Length headers should be rejected with 400"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")
def test_invalid_content_length_headers(self) -> None:
"""HTTP requests with invalid Content-Length header should be rejected with 400"""
self.hs.start_listening()
# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)
# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: eight\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)
# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)
# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")

View File

@@ -1728,9 +1728,6 @@ class UsernamePickerTestCase(HomeserverTestCase):
content_is_form=True, content_is_form=True,
custom_headers=[ custom_headers=[
("Cookie", "username_mapping_session=" + session_id), ("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
], ],
) )
self.assertEqual(chan.code, 302, chan.result) self.assertEqual(chan.code, 302, chan.result)
@@ -1818,9 +1815,6 @@ class UsernamePickerTestCase(HomeserverTestCase):
content_is_form=True, content_is_form=True,
custom_headers=[ custom_headers=[
("Cookie", "username_mapping_session=" + session_id), ("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
], ],
) )
self.assertEqual(chan.code, 302, chan.result) self.assertEqual(chan.code, 302, chan.result)

View File

@@ -2590,7 +2590,6 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
self.tok, self.tok,
shorthand=False, shorthand=False,
content_type=b"image/png", content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
) )
self.assertEqual(channel.code, 200) self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri") res = channel.json_body.get("content_uri")
@@ -2750,7 +2749,6 @@ class AuthenticatedMediaTestCase(unittest.HomeserverTestCase):
self.tok, self.tok,
shorthand=False, shorthand=False,
content_type=b"image/png", content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
) )
self.assertEqual(channel.code, 200) self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri") res = channel.json_body.get("content_uri")
@@ -2909,7 +2907,6 @@ class MediaUploadLimits(unittest.HomeserverTestCase):
access_token=self.tok, access_token=self.tok,
shorthand=False, shorthand=False,
content_type=b"text/plain", content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
) )
def test_upload_under_limit(self) -> None: def test_upload_under_limit(self) -> None:
@@ -3074,7 +3071,6 @@ class MediaUploadLimitsModuleOverrides(unittest.HomeserverTestCase):
access_token=tok, access_token=tok,
shorthand=False, shorthand=False,
content_type=b"text/plain", content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
) )
def test_upload_under_limit(self) -> None: def test_upload_under_limit(self) -> None:

View File

@@ -612,7 +612,6 @@ class RestHelper:
filename: The filename of the media to be uploaded filename: The filename of the media to be uploaded
expect_code: The return code to expect from attempting to upload the media expect_code: The return code to expect from attempting to upload the media
""" """
image_length = len(image_data)
path = "/_matrix/media/r0/upload?filename=%s" % (filename,) path = "/_matrix/media/r0/upload?filename=%s" % (filename,)
channel = make_request( channel = make_request(
self.reactor, self.reactor,
@@ -621,7 +620,6 @@ class RestHelper:
path, path,
content=image_data, content=image_data,
access_token=tok, access_token=tok,
custom_headers=[("Content-Length", str(image_length))],
) )
assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % ( assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % (

View File

@@ -81,6 +81,7 @@ from twisted.web.http_headers import Headers
from twisted.web.resource import IResource from twisted.web.resource import IResource
from twisted.web.server import Request, Site from twisted.web.server import Request, Site
from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.config.database import DatabaseConnectionConfig from synapse.config.database import DatabaseConnectionConfig
from synapse.config.homeserver import HomeServerConfig from synapse.config.homeserver import HomeServerConfig
from synapse.events.auto_accept_invites import InviteAutoAccepter from synapse.events.auto_accept_invites import InviteAutoAccepter
@@ -241,7 +242,6 @@ class FakeChannel:
def loseConnection(self) -> None: def loseConnection(self) -> None:
self.unregisterProducer() self.unregisterProducer()
self.transport.loseConnection()
# Type ignore: mypy doesn't like the fact that producer isn't an IProducer. # Type ignore: mypy doesn't like the fact that producer isn't an IProducer.
def registerProducer(self, producer: IProducer, streaming: bool) -> None: def registerProducer(self, producer: IProducer, streaming: bool) -> None:
@@ -428,18 +428,29 @@ def make_request(
channel = FakeChannel(site, reactor, ip=client_ip) channel = FakeChannel(site, reactor, ip=client_ip)
req = request(channel, site, our_server_name="test_server") req = request(
channel,
site,
our_server_name="test_server",
max_request_body_size=MAX_REQUEST_SIZE,
)
channel.request = req channel.request = req
req.content = BytesIO(content) req.content = BytesIO(content)
# Twisted expects to be at the end of the content when parsing the request. # Twisted expects to be at the end of the content when parsing the request.
req.content.seek(0, SEEK_END) req.content.seek(0, SEEK_END)
# Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded # If `Content-Length` was passed in as a custom header, don't automatically add it
# bodies if the Content-Length header is missing # here.
req.requestHeaders.addRawHeader( if custom_headers is None or not any(
b"Content-Length", str(len(content)).encode("ascii") (k if isinstance(k, bytes) else k.encode("ascii")) == b"Content-Length"
) for k, _ in custom_headers
):
# Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded
# bodies if the Content-Length header is missing
req.requestHeaders.addRawHeader(
b"Content-Length", str(len(content)).encode("ascii")
)
if access_token: if access_token:
req.requestHeaders.addRawHeader( req.requestHeaders.addRawHeader(

View File

@@ -212,6 +212,66 @@ class JsonResourceTests(unittest.TestCase):
self.assertEqual(channel.code, 200) self.assertEqual(channel.code, 200)
self.assertNotIn("body", channel.result) self.assertNotIn("body", channel.result)
def test_content_larger_than_content_length(self) -> None:
"""
HTTP requests with content size exceeding Content-Length should be rejected with 400.
"""
def _callback(
request: SynapseRequest, **kwargs: object
) -> tuple[int, JsonDict]:
return 200, {}
res = JsonResource(self.homeserver)
res.register_paths(
"POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet"
)
channel = make_request(
self.reactor,
FakeSite(res, self.reactor),
b"POST",
b"/_matrix/foo",
{},
# Set the `Content-Length` value to be smaller than the actual content size
custom_headers=[("Content-Length", "1")],
# The request should disconnect early so don't await the result
await_result=False,
)
self.reactor.advance(0.1)
self.assertEqual(channel.code, 400)
def test_content_smaller_than_content_length(self) -> None:
"""
HTTP requests with content size smaller than Content-Length should be rejected with 400.
"""
def _callback(
request: SynapseRequest, **kwargs: object
) -> tuple[int, JsonDict]:
return 200, {}
res = JsonResource(self.homeserver)
res.register_paths(
"POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet"
)
channel = make_request(
self.reactor,
FakeSite(res, self.reactor),
b"POST",
b"/_matrix/foo",
{},
# Set the `Content-Length` value to be larger than the actual content size
custom_headers=[("Content-Length", "10")],
# The request should disconnect early so don't await the result
await_result=False,
)
self.reactor.advance(0.1)
self.assertEqual(channel.code, 400)
class OptionsResourceTests(unittest.TestCase): class OptionsResourceTests(unittest.TestCase):
def setUp(self) -> None: def setUp(self) -> None: