From 7d99414edf2c5c7e602a88c72245add665e6afb4 Mon Sep 17 00:00:00 2001
From: Hannes Lerchl <aytchell@users.noreply.github.com>
Date: Wed, 15 Jun 2022 18:45:16 +0200
Subject: [PATCH 001/178] Replace pyjwt with authlib in `org.matrix.login.jwt`
 (#13011)

---
 changelog.d/13011.misc                        |  1 +
 docs/jwt.md                                   | 35 +++++++++-----
 .../configuration/config_documentation.md     |  6 ++-
 poetry.lock                                   |  8 ++--
 pyproject.toml                                |  7 +--
 synapse/config/jwt.py                         | 10 ++--
 synapse/rest/client/login.py                  | 46 +++++++++++++++----
 tests/rest/client/test_login.py               | 44 +++++++++---------
 8 files changed, 100 insertions(+), 57 deletions(-)
 create mode 100644 changelog.d/13011.misc

diff --git a/changelog.d/13011.misc b/changelog.d/13011.misc
new file mode 100644
index 0000000000..4da223219f
--- /dev/null
+++ b/changelog.d/13011.misc
@@ -0,0 +1 @@
+Replaced usage of PyJWT with methods from Authlib in `org.matrix.login.jwt`. Contributed by Hannes Lerchl.
diff --git a/docs/jwt.md b/docs/jwt.md
index 346daf78ad..8f859d59a6 100644
--- a/docs/jwt.md
+++ b/docs/jwt.md
@@ -37,19 +37,19 @@ As with other login types, there are additional fields (e.g. `device_id` and
 ## Preparing Synapse
 
 The JSON Web Token integration in Synapse uses the
-[`PyJWT`](https://pypi.org/project/pyjwt/) library, which must be installed
+[`Authlib`](https://docs.authlib.org/en/latest/index.html) library, which must be installed
 as follows:
 
- * The relevant libraries are included in the Docker images and Debian packages
-   provided by `matrix.org` so no further action is needed.
+* The relevant libraries are included in the Docker images and Debian packages
+  provided by `matrix.org` so no further action is needed.
 
- * If you installed Synapse into a virtualenv, run `/path/to/env/bin/pip
-   install synapse[pyjwt]` to install the necessary dependencies.
+* If you installed Synapse into a virtualenv, run `/path/to/env/bin/pip
+  install synapse[jwt]` to install the necessary dependencies.
 
- * For other installation mechanisms, see the documentation provided by the
-   maintainer.
+* For other installation mechanisms, see the documentation provided by the
+  maintainer.
 
-To enable the JSON web token integration, you should then add an `jwt_config` section
+To enable the JSON web token integration, you should then add a `jwt_config` section
 to your configuration file (or uncomment the `enabled: true` line in the
 existing section). See [sample_config.yaml](./sample_config.yaml) for some
 sample settings.
@@ -57,7 +57,7 @@ sample settings.
 ## How to test JWT as a developer
 
 Although JSON Web Tokens are typically generated from an external server, the
-examples below use [PyJWT](https://pyjwt.readthedocs.io/en/latest/) directly.
+example below uses a locally generated JWT.
 
 1.  Configure Synapse with JWT logins, note that this example uses a pre-shared
     secret and an algorithm of HS256:
@@ -70,10 +70,21 @@ examples below use [PyJWT](https://pyjwt.readthedocs.io/en/latest/) directly.
     ```
 2.  Generate a JSON web token:
 
-    ```bash
-    $ pyjwt --key=my-secret-token --alg=HS256 encode sub=test-user
-    eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0ZXN0LXVzZXIifQ.Ag71GT8v01UO3w80aqRPTeuVPBIBZkYhNTJJ-_-zQIc
+    You can use the following short Python snippet to generate a JWT
+    protected by an HMAC.
+    Take care that the `secret` and the algorithm given in the `header` match
+    the entries from `jwt_config` above.
+
+    ```python
+    from authlib.jose import jwt
+
+    header = {"alg": "HS256"}
+    payload = {"sub": "user1", "aud": ["audience"]}
+    secret = "my-secret-token"
+    result = jwt.encode(header, payload, secret)
+    print(result.decode("ascii"))
     ```
+
 3.  Query for the login types and ensure `org.matrix.login.jwt` is there:
 
     ```bash
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 392ae80a75..e88f68d2b8 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2946,8 +2946,10 @@ Additional sub-options for this setting include:
    tokens. Defaults to false.
 * `secret`: This is either the private shared secret or the public key used to
    decode the contents of the JSON web token. Required if `enabled` is set to true.
-* `algorithm`: The algorithm used to sign the JSON web token. Supported algorithms are listed at
-   https://pyjwt.readthedocs.io/en/latest/algorithms.html Required if `enabled` is set to true.
+* `algorithm`: The algorithm used to sign (or HMAC) the JSON web token.
+   Supported algorithms are listed
+   [here (section JWS)](https://docs.authlib.org/en/latest/specs/rfc7518.html).
+   Required if `enabled` is set to true.
 * `subject_claim`: Name of the claim containing a unique identifier for the user.
    Optional, defaults to `sub`.
 * `issuer`: The issuer to validate the "iss" claim against. Optional. If provided the 
diff --git a/poetry.lock b/poetry.lock
index 6a67f59bca..849e8a7a99 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -815,7 +815,7 @@ python-versions = ">=3.5"
 name = "pyjwt"
 version = "2.4.0"
 description = "JSON Web Token implementation in Python"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 
@@ -1546,9 +1546,9 @@ docs = ["sphinx", "repoze.sphinx.autointerface"]
 test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
 
 [extras]
-all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "pyjwt", "txredisapi", "hiredis", "Pympler"]
+all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler"]
 cache_memory = ["Pympler"]
-jwt = ["pyjwt"]
+jwt = ["authlib"]
 matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
 oidc = ["authlib"]
 opentracing = ["jaeger-client", "opentracing"]
@@ -1563,7 +1563,7 @@ url_preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "37bd4bccfdb5a869635f2135a85bea4a0729af7375a27de153b4fd9a4aebc195"
+content-hash = "73882e279e0379482f2fc7414cb71addfd408ca48ad508ff8a02b0cb544762af"
 
 [metadata.files]
 attrs = [
diff --git a/pyproject.toml b/pyproject.toml
index 85c2c9534f..44aa775c33 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,7 +175,6 @@ lxml = { version = ">=4.2.0", optional = true }
 sentry-sdk = { version = ">=0.7.2", optional = true }
 opentracing = { version = ">=2.2.0", optional = true }
 jaeger-client = { version = ">=4.0.0", optional = true }
-pyjwt = { version = ">=1.6.4", optional = true }
 txredisapi = { version = ">=1.4.7", optional = true }
 hiredis = { version = "*", optional = true }
 Pympler = { version = "*", optional = true }
@@ -196,7 +195,7 @@ systemd = ["systemd-python"]
 url_preview = ["lxml"]
 sentry = ["sentry-sdk"]
 opentracing = ["jaeger-client", "opentracing"]
-jwt = ["pyjwt"]
+jwt = ["authlib"]
 # hiredis is not a *strict* dependency, but it makes things much faster.
 # (if it is not installed, we fall back to slow code.)
 redis = ["txredisapi", "hiredis"]
@@ -222,7 +221,7 @@ all = [
     "psycopg2", "psycopg2cffi", "psycopg2cffi-compat",
     # saml2
     "pysaml2",
-    # oidc
+    # oidc and jwt
     "authlib",
     # url_preview
     "lxml",
@@ -230,8 +229,6 @@ all = [
     "sentry-sdk",
     # opentracing
     "jaeger-client", "opentracing",
-    # jwt
-    "pyjwt",
     # redis
     "txredisapi", "hiredis",
     # cache_memory
diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py
index 7e3c764b2c..49aaca7cf6 100644
--- a/synapse/config/jwt.py
+++ b/synapse/config/jwt.py
@@ -18,10 +18,10 @@ from synapse.types import JsonDict
 
 from ._base import Config, ConfigError
 
-MISSING_JWT = """Missing jwt library. This is required for jwt login.
+MISSING_AUTHLIB = """Missing authlib library. This is required for jwt login.
 
     Install by running:
-        pip install pyjwt
+        pip install synapse[jwt]
     """
 
 
@@ -43,11 +43,11 @@ class JWTConfig(Config):
             self.jwt_audiences = jwt_config.get("audiences")
 
             try:
-                import jwt
+                from authlib.jose import JsonWebToken
 
-                jwt  # To stop unused lint.
+                JsonWebToken  # To stop unused lint.
             except ImportError:
-                raise ConfigError(MISSING_JWT)
+                raise ConfigError(MISSING_AUTHLIB)
         else:
             self.jwt_enabled = False
             self.jwt_secret = None
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index cf4196ac0a..dd75e40f34 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -420,17 +420,31 @@ class LoginRestServlet(RestServlet):
                 403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN
             )
 
-        import jwt
+        from authlib.jose import JsonWebToken, JWTClaims
+        from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError
+
+        jwt = JsonWebToken([self.jwt_algorithm])
+        claim_options = {}
+        if self.jwt_issuer is not None:
+            claim_options["iss"] = {"value": self.jwt_issuer, "essential": True}
+        if self.jwt_audiences is not None:
+            claim_options["aud"] = {"values": self.jwt_audiences, "essential": True}
 
         try:
-            payload = jwt.decode(
+            claims = jwt.decode(
                 token,
-                self.jwt_secret,
-                algorithms=[self.jwt_algorithm],
-                issuer=self.jwt_issuer,
-                audience=self.jwt_audiences,
+                key=self.jwt_secret,
+                claims_cls=JWTClaims,
+                claims_options=claim_options,
             )
-        except jwt.PyJWTError as e:
+        except BadSignatureError:
+            # We handle this case separately to provide a better error message
+            raise LoginError(
+                403,
+                "JWT validation failed: Signature verification failed",
+                errcode=Codes.FORBIDDEN,
+            )
+        except JoseError as e:
             # A JWT error occurred, return some info back to the client.
             raise LoginError(
                 403,
@@ -438,7 +452,23 @@ class LoginRestServlet(RestServlet):
                 errcode=Codes.FORBIDDEN,
             )
 
-        user = payload.get(self.jwt_subject_claim, None)
+        try:
+            claims.validate(leeway=120)  # allows 2 min of clock skew
+
+            # Enforce the old behavior which is rolled out in productive
+            # servers: if the JWT contains an 'aud' claim but none is
+            # configured, the login attempt will fail
+            if claims.get("aud") is not None:
+                if self.jwt_audiences is None or len(self.jwt_audiences) == 0:
+                    raise InvalidClaimError("aud")
+        except JoseError as e:
+            raise LoginError(
+                403,
+                "JWT validation failed: %s" % (str(e),),
+                errcode=Codes.FORBIDDEN,
+            )
+
+        user = claims.get(self.jwt_subject_claim, None)
         if user is None:
             raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN)
 
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index f4ea1209d9..f6efa5fe37 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -14,7 +14,7 @@
 import json
 import time
 import urllib.parse
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 from unittest.mock import Mock
 from urllib.parse import urlencode
 
@@ -41,7 +41,7 @@ from tests.test_utils.html_parsers import TestHtmlParser
 from tests.unittest import HomeserverTestCase, override_config, skip_unless
 
 try:
-    import jwt
+    from authlib.jose import jwk, jwt
 
     HAS_JWT = True
 except ImportError:
@@ -841,7 +841,7 @@ class CASTestCase(unittest.HomeserverTestCase):
         self.assertIn(b"SSO account deactivated", channel.result["body"])
 
 
-@skip_unless(HAS_JWT, "requires jwt")
+@skip_unless(HAS_JWT, "requires authlib")
 class JWTTestCase(unittest.HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -866,11 +866,9 @@ class JWTTestCase(unittest.HomeserverTestCase):
         return config
 
     def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_secret) -> str:
-        # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str.
-        result: Union[str, bytes] = jwt.encode(payload, secret, self.jwt_algorithm)
-        if isinstance(result, bytes):
-            return result.decode("ascii")
-        return result
+        header = {"alg": self.jwt_algorithm}
+        result: bytes = jwt.encode(header, payload, secret)
+        return result.decode("ascii")
 
     def jwt_login(self, *args: Any) -> FakeChannel:
         params = {"type": "org.matrix.login.jwt", "token": self.jwt_encode(*args)}
@@ -902,7 +900,8 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"403", channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
-            channel.json_body["error"], "JWT validation failed: Signature has expired"
+            channel.json_body["error"],
+            "JWT validation failed: expired_token: The token is expired",
         )
 
     def test_login_jwt_not_before(self) -> None:
@@ -912,7 +911,7 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
             channel.json_body["error"],
-            "JWT validation failed: The token is not yet valid (nbf)",
+            "JWT validation failed: invalid_token: The token is not valid yet",
         )
 
     def test_login_no_sub(self) -> None:
@@ -934,7 +933,8 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"403", channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
-            channel.json_body["error"], "JWT validation failed: Invalid issuer"
+            channel.json_body["error"],
+            'JWT validation failed: invalid_claim: Invalid claim "iss"',
         )
 
         # Not providing an issuer.
@@ -943,7 +943,7 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
             channel.json_body["error"],
-            'JWT validation failed: Token is missing the "iss" claim',
+            'JWT validation failed: missing_claim: Missing "iss" claim',
         )
 
     def test_login_iss_no_config(self) -> None:
@@ -965,7 +965,8 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"403", channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
-            channel.json_body["error"], "JWT validation failed: Invalid audience"
+            channel.json_body["error"],
+            'JWT validation failed: invalid_claim: Invalid claim "aud"',
         )
 
         # Not providing an audience.
@@ -974,7 +975,7 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
             channel.json_body["error"],
-            'JWT validation failed: Token is missing the "aud" claim',
+            'JWT validation failed: missing_claim: Missing "aud" claim',
         )
 
     def test_login_aud_no_config(self) -> None:
@@ -983,7 +984,8 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"403", channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(
-            channel.json_body["error"], "JWT validation failed: Invalid audience"
+            channel.json_body["error"],
+            'JWT validation failed: invalid_claim: Invalid claim "aud"',
         )
 
     def test_login_default_sub(self) -> None:
@@ -1010,7 +1012,7 @@ class JWTTestCase(unittest.HomeserverTestCase):
 # The JWTPubKeyTestCase is a complement to JWTTestCase where we instead use
 # RSS256, with a public key configured in synapse as "jwt_secret", and tokens
 # signed by the private key.
-@skip_unless(HAS_JWT, "requires jwt")
+@skip_unless(HAS_JWT, "requires authlib")
 class JWTPubKeyTestCase(unittest.HomeserverTestCase):
     servlets = [
         login.register_servlets,
@@ -1071,11 +1073,11 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase):
         return config
 
     def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str:
-        # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str.
-        result: Union[bytes, str] = jwt.encode(payload, secret, "RS256")
-        if isinstance(result, bytes):
-            return result.decode("ascii")
-        return result
+        header = {"alg": "RS256"}
+        if secret.startswith("-----BEGIN RSA PRIVATE KEY-----"):
+            secret = jwk.dumps(secret, kty="RSA")
+        result: bytes = jwt.encode(header, payload, secret)
+        return result.decode("ascii")
 
     def jwt_login(self, *args: Any) -> FakeChannel:
         params = {"type": "org.matrix.login.jwt", "token": self.jwt_encode(*args)}

From c95b04bb0e719d3f5de1714b442f95a39c6e3634 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 15 Jun 2022 17:55:20 +0100
Subject: [PATCH 002/178] Change default `sync_response_cache_duration`
 (#13042)

---
 changelog.d/13042.misc                              | 1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 | 6 ++++++
 docs/usage/configuration/config_documentation.md    | 4 ++--
 synapse/config/cache.py                             | 2 +-
 tests/utils.py                                      | 2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13042.misc

diff --git a/changelog.d/13042.misc b/changelog.d/13042.misc
new file mode 100644
index 0000000000..745d5fcf84
--- /dev/null
+++ b/changelog.d/13042.misc
@@ -0,0 +1 @@
+Set default `sync_response_cache_duration` to two minutes.
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index a5b1b6bb8b..7c6a0fd756 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -103,4 +103,10 @@ server_notices:
   system_mxid_avatar_url: ""
   room_name: "Server Alert"
 
+
+# Disable sync cache so that initial `/sync` requests are up-to-date.
+caches:
+  sync_response_cache_duration: 0
+
+
 {% include "shared-orig.yaml.j2" %}
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index e88f68d2b8..4e68801938 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1137,8 +1137,8 @@ Caching can be configured through the following sub-options:
 * `sync_response_cache_duration`: Controls how long the results of a /sync request are
   cached for after a successful response is returned. A higher duration can help clients
   with intermittent connections, at the cost of higher memory usage.
-  By default, this is zero, which means that sync responses are not cached
-  at all.
+  A value of zero means that sync responses are not cached.
+  Defaults to 2m.
 * `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and
    `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory 
    usage and cache entry availability. You must be using [jemalloc](https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) 
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index d0b491ea6c..63310c8d07 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -206,7 +206,7 @@ class CacheConfig(Config):
             self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl)
 
         self.sync_response_cache_duration = self.parse_duration(
-            cache_config.get("sync_response_cache_duration", 0)
+            cache_config.get("sync_response_cache_duration", "2m")
         )
 
     def resize_all_caches(self) -> None:
diff --git a/tests/utils.py b/tests/utils.py
index 3059c453d5..cabb2c0dec 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -169,7 +169,7 @@ def default_config(name, parse=False):
         # disable user directory updates, because they get done in the
         # background, which upsets the test runner.
         "update_user_directory": False,
-        "caches": {"global_factor": 1},
+        "caches": {"global_factor": 1, "sync_response_cache_duration": 0},
         "listeners": [{"port": 0, "type": "http"}],
     }
 

From 99d3931974e65865d1102ee79d7b7e2b017a3180 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 15 Jun 2022 18:58:23 +0100
Subject: [PATCH 003/178] Add more tests for room upgrades (#13074)

Signed-off-by: Sean Quah <seanq@element.io>
---
 changelog.d/13074.misc                 |  1 +
 tests/rest/client/test_upgrade_room.py | 83 ++++++++++++++++++++++++--
 2 files changed, 79 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13074.misc

diff --git a/changelog.d/13074.misc b/changelog.d/13074.misc
new file mode 100644
index 0000000000..a502e44d92
--- /dev/null
+++ b/changelog.d/13074.misc
@@ -0,0 +1 @@
+Add more tests for room upgrades.
diff --git a/tests/rest/client/test_upgrade_room.py b/tests/rest/client/test_upgrade_room.py
index 98c1039d33..5e7bf97482 100644
--- a/tests/rest/client/test_upgrade_room.py
+++ b/tests/rest/client/test_upgrade_room.py
@@ -48,10 +48,14 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
         self.helper.join(self.room_id, self.other, tok=self.other_token)
 
     def _upgrade_room(
-        self, token: Optional[str] = None, room_id: Optional[str] = None
+        self,
+        token: Optional[str] = None,
+        room_id: Optional[str] = None,
+        expire_cache: bool = True,
     ) -> FakeChannel:
-        # We never want a cached response.
-        self.reactor.advance(5 * 60 + 1)
+        if expire_cache:
+            # We don't want a cached response.
+            self.reactor.advance(5 * 60 + 1)
 
         if room_id is None:
             room_id = self.room_id
@@ -72,9 +76,24 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertIn("replacement_room", channel.json_body)
 
-    def test_not_in_room(self) -> None:
+        new_room_id = channel.json_body["replacement_room"]
+
+        # Check that the tombstone event points to the new room.
+        tombstone_event = self.get_success(
+            self.hs.get_storage_controllers().state.get_current_state_event(
+                self.room_id, EventTypes.Tombstone, ""
+            )
+        )
+        self.assertIsNotNone(tombstone_event)
+        self.assertEqual(new_room_id, tombstone_event.content["replacement_room"])
+
+        # Check that the new room exists.
+        room = self.get_success(self.store.get_room(new_room_id))
+        self.assertIsNotNone(room)
+
+    def test_never_in_room(self) -> None:
         """
-        Upgrading a room should work fine.
+        A user who has never been in the room cannot upgrade the room.
         """
         # The user isn't in the room.
         roomless = self.register_user("roomless", "pass")
@@ -83,6 +102,16 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
         channel = self._upgrade_room(roomless_token)
         self.assertEqual(403, channel.code, channel.result)
 
+    def test_left_room(self) -> None:
+        """
+        A user who is no longer in the room cannot upgrade the room.
+        """
+        # Remove the user from the room.
+        self.helper.leave(self.room_id, self.creator, tok=self.creator_token)
+
+        channel = self._upgrade_room(self.creator_token)
+        self.assertEqual(403, channel.code, channel.result)
+
     def test_power_levels(self) -> None:
         """
         Another user can upgrade the room if their power level is increased.
@@ -297,3 +326,47 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
         self.assertEqual(
             create_event.content.get(EventContentFields.ROOM_TYPE), test_room_type
         )
+
+    def test_second_upgrade_from_same_user(self) -> None:
+        """A second room upgrade from the same user is deduplicated."""
+        channel1 = self._upgrade_room()
+        self.assertEqual(200, channel1.code, channel1.result)
+
+        channel2 = self._upgrade_room(expire_cache=False)
+        self.assertEqual(200, channel2.code, channel2.result)
+
+        self.assertEqual(
+            channel1.json_body["replacement_room"],
+            channel2.json_body["replacement_room"],
+        )
+
+    def test_second_upgrade_after_delay(self) -> None:
+        """A second room upgrade is not deduplicated after some time has passed."""
+        channel1 = self._upgrade_room()
+        self.assertEqual(200, channel1.code, channel1.result)
+
+        channel2 = self._upgrade_room(expire_cache=True)
+        self.assertEqual(200, channel2.code, channel2.result)
+
+        self.assertNotEqual(
+            channel1.json_body["replacement_room"],
+            channel2.json_body["replacement_room"],
+        )
+
+    def test_second_upgrade_from_different_user(self) -> None:
+        """A second room upgrade from a different user is blocked."""
+        channel = self._upgrade_room()
+        self.assertEqual(200, channel.code, channel.result)
+
+        channel = self._upgrade_room(self.other_token, expire_cache=False)
+        self.assertEqual(400, channel.code, channel.result)
+
+    def test_first_upgrade_does_not_block_second(self) -> None:
+        """A second room upgrade is not blocked when a previous upgrade attempt was not
+        allowed.
+        """
+        channel = self._upgrade_room(self.other_token)
+        self.assertEqual(403, channel.code, channel.result)
+
+        channel = self._upgrade_room(expire_cache=False)
+        self.assertEqual(200, channel.code, channel.result)

From cba1c5cbc293b2601d81b0cb9b1a28ec6f1e26a1 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 15 Jun 2022 11:31:46 -0700
Subject: [PATCH 004/178] Add headers to individual options in config
 documentation to allow for linking. (#13055)

---
 changelog.d/13055.misc                        |   1 +
 .../configuration/config_documentation.md     | 326 +++++++++---------
 2 files changed, 164 insertions(+), 163 deletions(-)
 create mode 100644 changelog.d/13055.misc

diff --git a/changelog.d/13055.misc b/changelog.d/13055.misc
new file mode 100644
index 0000000000..92a02a6080
--- /dev/null
+++ b/changelog.d/13055.misc
@@ -0,0 +1 @@
+Add headers to individual options in config documentation to allow for linking.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 4e68801938..7c9860c3e1 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -67,7 +67,7 @@ apply if you want your config file to be read properly. A few helpful things to
   enabled: false
   ```
   In this manual, all top-level settings (ones with no indentation) are identified 
-  at the beginning of their section (i.e. "Config option: `example_setting`") and 
+  at the beginning of their section (i.e. "### `example_setting`") and 
   the sub-options, if any, are identified and listed in the body of the section. 
   In addition, each setting has an example of its usage, with the proper indentation
   shown. 
@@ -124,7 +124,7 @@ documentation on how to configure or create custom modules for Synapse.
 
 
 ---
-Config option: `modules`
+### `modules`
 
 Use the `module` sub-option to add modules under this option to extend functionality. 
 The `module` setting then has a sub-option, `config`, which can be used to define some configuration
@@ -147,7 +147,7 @@ modules:
 Define your homeserver name and other base options.
 
 ---
-Config option: `server_name`
+### `server_name`
 
 This sets the public-facing domain of the server.
 
@@ -177,7 +177,7 @@ Example configuration #2:
 server_name: localhost:8080
 ```
 ---
-Config option: `pid_file`
+### `pid_file`
 
 When running Synapse as a daemon, the file to store the pid in. Defaults to none.
 
@@ -186,7 +186,7 @@ Example configuration:
 pid_file: DATADIR/homeserver.pid
 ```
 ---
-Config option: `web_client_location`
+### `web_client_location`
 
 The absolute URL to the web client which `/` will redirect to. Defaults to none. 
 
@@ -195,7 +195,7 @@ Example configuration:
 web_client_location: https://riot.example.com/
 ```
 ---
-Config option: `public_baseurl`
+### `public_baseurl`
 
 The public-facing base URL that clients use to access this Homeserver (not
 including _matrix/...). This is the same URL a user might enter into the
@@ -211,7 +211,7 @@ Example configuration:
 public_baseurl: https://example.com/
 ```
 ---
-Config option: `serve_server_wellknown`
+### `serve_server_wellknown`
 
 By default, other servers will try to reach our server on port 8448, which can
 be inconvenient in some environments.
@@ -230,7 +230,7 @@ Example configuration:
 serve_server_wellknown: true
 ```
 ---
-Config option: `soft_file_limit`
+### `soft_file_limit`
  
 Set the soft limit on the number of file descriptors synapse can use.
 Zero is used to indicate synapse should set the soft limit to the hard limit.
@@ -241,7 +241,7 @@ Example configuration:
 soft_file_limit: 3
 ```
 ---
-Config option: `presence`
+### `presence`
 
 Presence tracking allows users to see the state (e.g online/offline)
 of other local and remote users. Set the `enabled` sub-option to false to  
@@ -254,7 +254,7 @@ presence:
   enabled: false
 ```
 ---
-Config option: `require_auth_for_profile_requests`
+### `require_auth_for_profile_requests`
 
 Whether to require authentication to retrieve profile data (avatars, display names) of other 
 users through the client API. Defaults to false. Note that profile data is also available 
@@ -265,7 +265,7 @@ Example configuration:
 require_auth_for_profile_requests: true
 ```
 ---
-Config option: `limit_profile_requests_to_users_who_share_rooms`
+### `limit_profile_requests_to_users_who_share_rooms`
 
 Use this option to require a user to share a room with another user in order
 to retrieve their profile information. Only checked on Client-Server 
@@ -277,7 +277,7 @@ Example configuration:
 limit_profile_requests_to_users_who_share_rooms: true
 ```
 ---
-Config option: `include_profile_data_on_invite`
+### `include_profile_data_on_invite`
 
 Use this option to prevent a user's profile data from being retrieved and
 displayed in a room until they have joined it. By default, a user's
@@ -290,7 +290,7 @@ Example configuration:
 include_profile_data_on_invite: false
 ```
 ---
-Config option: `allow_public_rooms_without_auth`
+### `allow_public_rooms_without_auth`
 
 If set to true, removes the need for authentication to access the server's
 public rooms directory through the client API, meaning that anyone can
@@ -301,7 +301,7 @@ Example configuration:
 allow_public_rooms_without_auth: true
 ```
 ---
-Config option: `allow_public_rooms_without_auth`
+### `allow_public_rooms_without_auth`
 
 If set to true, allows any other homeserver to fetch the server's public
 rooms directory via federation. Defaults to false.
@@ -311,7 +311,7 @@ Example configuration:
 allow_public_rooms_over_federation: true
 ```
 ---
-Config option: `default_room_version`
+### `default_room_version`
 
 The default room version for newly created rooms on this server.
 
@@ -327,7 +327,7 @@ Example configuration:
 default_room_version: "8"
 ```
 ---
-Config option: `gc_thresholds`
+### `gc_thresholds`
 
 The garbage collection threshold parameters to pass to `gc.set_threshold`, if defined.
 Defaults to none. 
@@ -337,7 +337,7 @@ Example configuration:
 gc_thresholds: [700, 10, 10]
 ```
 ---
-Config option: `gc_min_interval`
+### `gc_min_interval`
 
 The minimum time in seconds between each GC for a generation, regardless of
 the GC thresholds. This ensures that we don't do GC too frequently. A value of `[1s, 10s, 30s]` 
@@ -350,7 +350,7 @@ Example configuration:
 gc_min_interval: [0.5s, 30s, 1m]
 ```
 ---
-Config option: `filter_timeline_limit`
+### `filter_timeline_limit`
 
 Set the limit on the returned events in the timeline in the get
 and sync operations. Defaults to 100. A value of -1 means no upper limit.
@@ -361,7 +361,7 @@ Example configuration:
 filter_timeline_limit: 5000
 ```
 ---
-Config option: `block_non_admin_invites`
+### `block_non_admin_invites`
 
 Whether room invites to users on this server should be blocked
 (except those sent by local server admins). Defaults to false.
@@ -371,7 +371,7 @@ Example configuration:
 block_non_admin_invites: true
 ```
 ---
-Config option: `enable_search`
+### `enable_search`
 
 If set to false, new messages will not be indexed for searching and users
 will receive errors when searching for messages. Defaults to true.
@@ -381,7 +381,7 @@ Example configuration:
 enable_search: false
 ```
 ---
-Config option: `ip_range_blacklist`
+### `ip_range_blacklist`
  
 This option prevents outgoing requests from being sent to the specified blacklisted IP address
 CIDR ranges. If this option is not specified then it defaults to private IP
@@ -421,7 +421,7 @@ ip_range_blacklist:
   - 'fec0::/10'
 ```
 ---
-Config option: `ip_range_whitelist`
+### `ip_range_whitelist`
 
 List of IP address CIDR ranges that should be allowed for federation,
 identity servers, push servers, and for checking key validity for
@@ -438,7 +438,7 @@ ip_range_whitelist:
    - '192.168.1.1'
 ```
 ---
-Config option: `listeners`
+### `listeners`
 
 List of ports that Synapse should listen on, their purpose and their
 configuration.
@@ -539,7 +539,7 @@ listeners:
     type: manhole
 ```
 ---
-Config option: `manhole_settings`
+### `manhole_settings`
 
 Connection settings for the manhole. You can find more information
 on the manhole [here](../../manhole.md). Manhole sub-options include:
@@ -558,7 +558,7 @@ manhole_settings:
   ssh_pub_key_path: CONFDIR/id_rsa.pub
 ```
 ---
-Config option: `dummy_events_threshold`
+### `dummy_events_threshold`
 
 Forward extremities can build up in a room due to networking delays between
 homeservers. Once this happens in a large room, calculation of the state of
@@ -592,7 +592,7 @@ Useful options for Synapse admins.
 
 ---
 
-Config option: `admin_contact`
+### `admin_contact`
 
 How to reach the server admin, used in `ResourceLimitError`. Defaults to none. 
 
@@ -601,7 +601,7 @@ Example configuration:
 admin_contact: 'mailto:admin@server.com'
 ```
 ---
-Config option: `hs_disabled` and `hs_disabled_message`
+### `hs_disabled` and `hs_disabled_message`
 
 Blocks users from connecting to the homeserver and provides a human-readable reason
 why the connection was blocked. Defaults to false. 
@@ -612,7 +612,7 @@ hs_disabled: true
 hs_disabled_message: 'Reason for why the HS is blocked'
 ```
 ---
-Config option: `limit_usage_by_mau`
+### `limit_usage_by_mau`
 
 This option disables/enables monthly active user blocking. Used in cases where the admin or 
 server owner wants to limit to the number of monthly active users. When enabled and a limit is 
@@ -624,7 +624,7 @@ Example configuration:
 limit_usage_by_mau: true 
 ```
 ---
-Config option: `max_mau_value`
+### `max_mau_value`
 
 This option sets the hard limit of monthly active users above which the server will start 
 blocking user actions if `limit_usage_by_mau` is enabled. Defaults to 0.  
@@ -634,7 +634,7 @@ Example configuration:
 max_mau_value: 50
 ```
 ---
-Config option: `mau_trial_days`
+### `mau_trial_days`
 
 The option `mau_trial_days` is a means to add a grace period for active users. It
 means that users must be active for the specified number of days before they
@@ -647,7 +647,7 @@ Example configuration:
 mau_trial_days: 5
 ```
 ---
-Config option: `mau_appservice_trial_days`
+### `mau_appservice_trial_days`
 
 The option `mau_appservice_trial_days` is similar to `mau_trial_days`, but applies a different
 trial number if the user was registered by an appservice. A value
@@ -661,7 +661,7 @@ mau_appservice_trial_days:
   another_appservice_id: 6
 ```
 ---
-Config option: `mau_limit_alerting`
+### `mau_limit_alerting`
 
 The option `mau_limit_alerting` is a means of limiting client-side alerting
 should the mau limit be reached. This is useful for small instances
@@ -674,7 +674,7 @@ Example configuration:
 mau_limit_alerting: false
 ```
 ---
-Config option: `mau_stats_only`
+### `mau_stats_only`
 
 If enabled, the metrics for the number of monthly active users will
 be populated, however no one will be limited based on these numbers. If `limit_usage_by_mau`
@@ -685,7 +685,7 @@ Example configuration:
 mau_stats_only: true
 ```
 ---
-Config option: `mau_limit_reserved_threepids`
+### `mau_limit_reserved_threepids`
 
 Sometimes the server admin will want to ensure certain accounts are
 never blocked by mau checking. These accounts are specified by this option.
@@ -699,7 +699,7 @@ mau_limit_reserved_threepids:
     address: 'reserved_user@example.com'
 ```
 ---
-Config option: `server_context`
+### `server_context`
 
 This option is used by phonehome stats to group together related servers.
 Defaults to none. 
@@ -709,7 +709,7 @@ Example configuration:
 server_context: context
 ```
 ---
-Config option: `limit_remote_rooms`
+### `limit_remote_rooms`
 
 When this option is enabled, the room "complexity" will be checked before a user
 joins a new remote room. If it is above the complexity limit, the server will
@@ -733,7 +733,7 @@ limit_remote_rooms:
   admins_can_join: true
 ```
 ---
-Config option: `require_membership_for_aliases`
+### `require_membership_for_aliases`
 
 Whether to require a user to be in the room to add an alias to it.
 Defaults to true.
@@ -743,7 +743,7 @@ Example configuration:
 require_membership_for_aliases: false
 ```
 ---
-Config option: `allow_per_room_profiles`
+### `allow_per_room_profiles`
 
 Whether to allow per-room membership profiles through the sending of membership
 events with profile information that differs from the target's global profile.
@@ -754,7 +754,7 @@ Example configuration:
 allow_per_room_profiles: false
 ```
 ---
-Config option: `max_avatar_size`
+### `max_avatar_size`
 
 The largest permissible file size in bytes for a user avatar. Defaults to no restriction.
 Use M for MB and K for KB. 
@@ -766,7 +766,7 @@ Example configuration:
 max_avatar_size: 10M
 ```
 ---
-Config option: `allowed_avatar_mimetypes`
+### `allowed_avatar_mimetypes`
 
 The MIME types allowed for user avatars. Defaults to no restriction.
 
@@ -778,7 +778,7 @@ Example configuration:
 allowed_avatar_mimetypes: ["image/png", "image/jpeg", "image/gif"]
 ```
 ---
-Config option: `redaction_retention_period`
+### `redaction_retention_period`
 
 How long to keep redacted events in unredacted form in the database. After
 this period redacted events get replaced with their redacted form in the DB.
@@ -790,7 +790,7 @@ Example configuration:
 redaction_retention_period: 28d
 ```
 ---
-Config option: `user_ips_max_age` 
+### `user_ips_max_age` 
 
 How long to track users' last seen time and IPs in the database.
 
@@ -801,7 +801,7 @@ Example configuration:
 user_ips_max_age: 14d
 ```
 ---
-Config option: `request_token_inhibit_3pid_errors`
+### `request_token_inhibit_3pid_errors`
 
 Inhibits the `/requestToken` endpoints from returning an error that might leak
 information about whether an e-mail address is in use or not on this
@@ -816,7 +816,7 @@ Example configuration:
 request_token_inhibit_3pid_errors: true
 ```
 ---
-Config option: `next_link_domain_whitelist`
+### `next_link_domain_whitelist`
 
 A list of domains that the domain portion of `next_link` parameters
 must match.
@@ -838,7 +838,7 @@ Example configuration:
 next_link_domain_whitelist: ["matrix.org"]
 ```
 ---
-Config option: `templates` and `custom_template_directory`
+### `templates` and `custom_template_directory`
 
 These options define templates to use when generating email or HTML page contents.
 The `custom_template_directory` determines which directory Synapse will try to 
@@ -855,7 +855,7 @@ templates:
   custom_template_directory: /path/to/custom/templates/
 ```
 ---
-Config option: `retention`
+### `retention`
 
 This option and the associated options determine message retention policy at the
 server level.
@@ -934,7 +934,7 @@ retention:
 Options related to TLS.
 
 ---
-Config option: `tls_certificate_path`
+### `tls_certificate_path`
 
 This option specifies a PEM-encoded X509 certificate for TLS.
 This certificate, as of Synapse 1.0, will need to be a valid and verifiable
@@ -949,7 +949,7 @@ Example configuration:
 tls_certificate_path: "CONFDIR/SERVERNAME.tls.crt"
 ```
 ---
-Config option: `tls_private_key_path`
+### `tls_private_key_path`
 
 PEM-encoded private key for TLS. Defaults to none. 
 
@@ -958,7 +958,7 @@ Example configuration:
 tls_private_key_path: "CONFDIR/SERVERNAME.tls.key"
 ```
 ---
-Config option: `federation_verify_certificates`
+### `federation_verify_certificates`
 Whether to verify TLS server certificates for outbound federation requests.
 
 Defaults to true. To disable certificate verification, set the option to false.
@@ -968,7 +968,7 @@ Example configuration:
 federation_verify_certificates: false
 ```
 ---
-Config option: `federation_client_minimum_tls_version`
+### `federation_client_minimum_tls_version`
 
 The minimum TLS version that will be used for outbound federation requests.
 
@@ -982,7 +982,7 @@ Example configuration:
 federation_client_minimum_tls_version: 1.2
 ```
 ---
-Config option: `federation_certificate_verification_whitelist`
+### `federation_certificate_verification_whitelist`
 
 Skip federation certificate verification on a given whitelist
 of domains.
@@ -1001,7 +1001,7 @@ federation_certificate_verification_whitelist:
   - "*.onion"
 ```
 ---
-Config option: `federation_custom_ca_list`
+### `federation_custom_ca_list`
 
 List of custom certificate authorities for federation traffic.
 
@@ -1024,7 +1024,7 @@ federation_custom_ca_list:
 Options related to federation.
 
 ---
-Config option: `federation_domain_whitelist`
+### `federation_domain_whitelist`
 
 Restrict federation to the given whitelist of domains.
 N.B. we recommend also firewalling your federation listener to limit
@@ -1040,7 +1040,7 @@ federation_domain_whitelist:
   - syd.example.com
 ```
 ---
-Config option: `federation_metrics_domains`
+### `federation_metrics_domains`
 
 Report prometheus metrics on the age of PDUs being sent to and received from
 the given domains. This can be used to give an idea of "delay" on inbound
@@ -1056,7 +1056,7 @@ federation_metrics_domains:
   - example.com
 ```
 ---
-Config option: `allow_profile_lookup_over_federation`
+### `allow_profile_lookup_over_federation`
 
 Set to false to disable profile lookup over federation. By default, the
 Federation API allows other homeservers to obtain profile data of any user
@@ -1067,7 +1067,7 @@ Example configuration:
 allow_profile_lookup_over_federation: false
 ```
 ---
-Config option: `allow_device_name_lookup_over_federation`
+### `allow_device_name_lookup_over_federation`
 
 Set this option to true to allow device display name lookup over federation. By default, the
 Federation API prevents other homeservers from obtaining the display names of any user devices
@@ -1083,7 +1083,7 @@ allow_device_name_lookup_over_federation: true
 Options related to caching
 
 ---
-Config option: `event_cache_size`
+### `event_cache_size`
 
 The number of events to cache in memory. Not affected by
 `caches.global_factor`. Defaults to 10K.
@@ -1093,7 +1093,7 @@ Example configuration:
 event_cache_size: 15K
 ```
 ---
-Config option: `cache` and associated values
+### `cache` and associated values
 
 A cache 'factor' is a multiplier that can be applied to each of
 Synapse's caches in order to increase or decrease the maximum
@@ -1190,7 +1190,7 @@ file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
 Config options related to database settings.
 
 ---
-Config option: `database`
+### `database`
 
 The `database` setting defines the database that synapse uses to store all of
 its data.
@@ -1245,7 +1245,7 @@ database:
 Config options related to logging. 
 
 ---
-Config option: `log_config`
+### `log_config`
 
 This option specifies a yaml python logging config file as described [here](https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema).
 
@@ -1261,7 +1261,7 @@ Each ratelimiting configuration is made of two parameters:
    - `per_second`: number of requests a client can send per second.
    - `burst_count`: number of requests a client can send before being throttled.
 ---
-Config option: `rc_message`
+### `rc_message`
 
 
 Ratelimiting settings for client messaging.
@@ -1276,7 +1276,7 @@ rc_message:
   burst_count: 15
 ```
 ---
-Config option: `rc_registration`
+### `rc_registration`
 
 This option ratelimits registration requests based on the client's IP address.
 It defaults to `per_second: 0.17`, `burst_count: 3`. 
@@ -1288,7 +1288,7 @@ rc_registration:
   burst_count: 2
 ```
 ---
-Config option: `rc_registration_token_validity`
+### `rc_registration_token_validity`
 
 This option checks the validity of registration tokens that ratelimits requests based on 
 the client's IP address.
@@ -1301,7 +1301,7 @@ rc_registration_token_validity:
   burst_count: 6
 ```   
 ---
-Config option: `rc_login`
+### `rc_login`
 
 This option specifies several limits for login:
 * `address` ratelimits login requests based on the client's IP
@@ -1329,7 +1329,7 @@ rc_login:
     burst_count: 7
 ```
 ---
-Config option: `rc_admin_redaction`
+### `rc_admin_redaction`
 
 This option sets ratelimiting redactions by room admins. If this is not explicitly 
 set then it uses the same ratelimiting as per `rc_message`. This is useful
@@ -1342,7 +1342,7 @@ rc_admin_redaction:
   burst_count: 50
 ```
 ---
-Config option: `rc_joins`
+### `rc_joins`
 
 This option allows for ratelimiting number of rooms a user can join. This setting has the following sub-options:
 
@@ -1364,7 +1364,7 @@ rc_joins:
     burst_count: 12
 ```
 ---
-Config option: `rc_3pid_validation`
+### `rc_3pid_validation`
 
 This option ratelimits how often a user or IP can attempt to validate a 3PID.
 Defaults to `per_second: 0.003`, `burst_count: 5`.
@@ -1376,7 +1376,7 @@ rc_3pid_validation:
   burst_count: 5
 ```
 ---
-Config option: `rc_invites`
+### `rc_invites`
 
 This option sets ratelimiting how often invites can be sent in a room or to a 
 specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10` and
@@ -1407,7 +1407,7 @@ rc_invites:
     burst_count: 3
 ```
 ---
-Config option: `rc_third_party_invite`
+### `rc_third_party_invite`
 
 This option ratelimits 3PID invites (i.e. invites sent to a third-party ID
 such as an email address or a phone number) based on the account that's
@@ -1420,7 +1420,7 @@ rc_third_party_invite:
   burst_count: 10
 ```
 ---
-Config option: `rc_federation`
+### `rc_federation`
 
 Defines limits on federation requests. 
 
@@ -1445,7 +1445,7 @@ rc_federation:
   concurrent: 5
 ```
 ---
-Config option: `federation_rr_transactions_per_room_per_second`
+### `federation_rr_transactions_per_room_per_second`
 
 Sets outgoing federation transaction frequency for sending read-receipts,
 per-room.
@@ -1462,7 +1462,7 @@ federation_rr_transactions_per_room_per_second: 40
 Config options related to Synapse's media store.
 
 ---
-Config option: `enable_media_repo` 
+### `enable_media_repo` 
 
 Enable the media store service in the Synapse master. Defaults to true. 
 Set to false if you are using a separate media store worker.
@@ -1472,7 +1472,7 @@ Example configuration:
 enable_media_repo: false
 ```
 ---
-Config option: `media_store_path`
+### `media_store_path`
 
 Directory where uploaded images and attachments are stored.
 
@@ -1481,7 +1481,7 @@ Example configuration:
 media_store_path: "DATADIR/media_store"
 ```
 ---
-Config option: `media_storage_providers`
+### `media_storage_providers`
 
 Media storage providers allow media to be stored in different
 locations. Defaults to none. Associated sub-options are:
@@ -1502,7 +1502,7 @@ media_storage_providers:
        directory: /mnt/some/other/directory
 ```
 ---
-Config option: `max_upload_size`
+### `max_upload_size`
 
 The largest allowed upload size in bytes.
 
@@ -1515,7 +1515,7 @@ Example configuration:
 max_upload_size: 60M
 ```
 ---
-Config option: `max_image_pixels`
+### `max_image_pixels`
 
 Maximum number of pixels that will be thumbnailed. Defaults to 32M.
 
@@ -1524,7 +1524,7 @@ Example configuration:
 max_image_pixels: 35M
 ```
 ---
-Config option: `dynamic_thumbnails`
+### `dynamic_thumbnails`
 
 Whether to generate new thumbnails on the fly to precisely match
 the resolution requested by the client. If true then whenever
@@ -1537,7 +1537,7 @@ Example configuration:
 dynamic_thumbnails: true
 ```
 ---
-Config option: `thumbnail_sizes`  
+### `thumbnail_sizes`  
 
 List of thumbnails to precalculate when an image is uploaded. Associated sub-options are:
 * `width`
@@ -1564,7 +1564,7 @@ thumbnail_sizes:
     method: scale
 ```
 ---
-Config option: `media_retention`
+### `media_retention`
 
 Controls whether local media and entries in the remote media cache
 (media that is downloaded from other homeservers) should be removed
@@ -1596,7 +1596,7 @@ media_retention:
     remote_media_lifetime: 14d
 ```
 ---
-Config option: `url_preview_enabled`
+### `url_preview_enabled`
 
 This setting determines whether the preview URL API is enabled.
 It is disabled by default. Set to true to enable. If enabled you must specify a
@@ -1607,7 +1607,7 @@ Example configuration:
 url_preview_enabled: true
 ```
 ---
-Config option: `url_preview_ip_range_blacklist`
+### `url_preview_ip_range_blacklist`
 
 List of IP address CIDR ranges that the URL preview spider is denied
 from accessing.  There are no defaults: you must explicitly
@@ -1649,7 +1649,7 @@ url_preview_ip_range_blacklist:
   - 'fec0::/10'
 ```
 ----
-Config option: `url_preview_ip_range_whitelist`
+### `url_preview_ip_range_whitelist`
 
 This option sets a list of IP address CIDR ranges that the URL preview spider is allowed
 to access even if they are specified in `url_preview_ip_range_blacklist`.
@@ -1663,7 +1663,7 @@ url_preview_ip_range_whitelist:
    - '192.168.1.1'
 ```
 ---
-Config option: `url_preview_url_blacklist`
+### `url_preview_url_blacklist`
 
 Optional list of URL matches that the URL preview spider is
 denied from accessing.  You should use `url_preview_ip_range_blacklist`
@@ -1709,7 +1709,7 @@ url_preview_url_blacklist:
   - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'
 ```
 ---
-Config option: `max_spider_size`
+### `max_spider_size`
 
 The largest allowed URL preview spidering size in bytes. Defaults to 10M.
 
@@ -1718,7 +1718,7 @@ Example configuration:
 max_spider_size: 8M
 ```
 ---
-Config option: `url_preview_language`
+### `url_preview_language`
 
 A list of values for the Accept-Language HTTP header used when
 downloading webpages during URL preview generation. This allows
@@ -1743,7 +1743,7 @@ Example configuration:
    - '*;q=0.7'
 ```
 ----
-Config option: `oembed`
+### `oembed`
 
 oEmbed allows for easier embedding content from a website. It can be
 used for generating URLs previews of services which support it. A default list of oEmbed providers
@@ -1764,7 +1764,7 @@ oembed:
 See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha.
 
 ---
-Config option: `recaptcha_public_key`
+### `recaptcha_public_key`
 
 This homeserver's ReCAPTCHA public key. Must be specified if `enable_registration_captcha` is 
 enabled.
@@ -1774,7 +1774,7 @@ Example configuration:
 recaptcha_public_key: "YOUR_PUBLIC_KEY"
 ```
 ---
-Config option: `recaptcha_private_key` 
+### `recaptcha_private_key` 
 
 This homeserver's ReCAPTCHA private key. Must be specified if `enable_registration_captcha` is 
 enabled.
@@ -1784,7 +1784,7 @@ Example configuration:
 recaptcha_private_key: "YOUR_PRIVATE_KEY"
 ```
 ---
-Config option: `enable_registration_captcha`
+### `enable_registration_captcha`
 
 Set to true to enable ReCaptcha checks when registering, preventing signup
 unless a captcha is answered. Requires a valid ReCaptcha public/private key. 
@@ -1795,7 +1795,7 @@ Example configuration:
 enable_registration_captcha: true
 ```
 ---
-Config option: `recaptcha_siteverify_api`
+### `recaptcha_siteverify_api`
 
 The API endpoint to use for verifying `m.login.recaptcha` responses.
 Defaults to `https://www.recaptcha.net/recaptcha/api/siteverify`.
@@ -1809,7 +1809,7 @@ recaptcha_siteverify_api: "https://my.recaptcha.site"
 Options related to adding a TURN server to Synapse.
 
 ---
-Config option: `turn_uris`
+### `turn_uris`
 
 The public URIs of the TURN server to give to clients.
 
@@ -1818,7 +1818,7 @@ Example configuration:
 turn_uris: [turn:example.org]
 ```
 ---
-Config option: `turn_shared_secret`
+### `turn_shared_secret`
 
 The shared secret used to compute passwords for the TURN server.
 
@@ -1837,7 +1837,7 @@ turn_username: "TURNSERVER_USERNAME"
 turn_password: "TURNSERVER_PASSWORD"
 ```
 ---
-Config option: `turn_user_lifetime`
+### `turn_user_lifetime`
 
 How long generated TURN credentials last. Defaults to 1h.
 
@@ -1846,7 +1846,7 @@ Example configuration:
 turn_user_lifetime: 2h
 ```
 ---
-Config option: `turn_allow_guests`
+### `turn_allow_guests`
 
 Whether guests should be allowed to use the TURN server. This defaults to true, otherwise
 VoIP will be unreliable for guests. However, it does introduce a slight security risk as
@@ -1862,7 +1862,7 @@ turn_allow_guests: false
 Registration can be rate-limited using the parameters in the [Ratelimiting](#ratelimiting) section of this manual.
 
 ---
-Config option: `enable_registration`
+### `enable_registration`
 
 Enable registration for new users. Defaults to false. It is highly recommended that if you enable registration,
 you use either captcha, email, or token-based verification to verify that new users are not bots. In order to enable registration 
@@ -1873,7 +1873,7 @@ Example configuration:
 enable_registration: true
 ```
 ---
-Config option: `enable_registration_without_verification`
+### `enable_registration_without_verification`
 Enable registration without email or captcha verification. Note: this option is *not* recommended,
 as registration without verification is a known vector for spam and abuse. Defaults to false. Has no effect
 unless `enable_registration` is also enabled.
@@ -1883,7 +1883,7 @@ Example configuration:
 enable_registration_without_verification: true
 ```
 ---
-Config option: `session_lifetime`
+### `session_lifetime`
 
 Time that a user's session remains valid for, after they log in.
 
@@ -1899,7 +1899,7 @@ Example configuration:
 session_lifetime: 24h
 ```
 ----
-Config option: `refresh_access_token_lifetime`
+### `refresh_access_token_lifetime`
 
 Time that an access token remains valid for, if the session is using refresh tokens.
 
@@ -1917,7 +1917,7 @@ Example configuration:
 refreshable_access_token_lifetime: 10m
 ```
 ---
-Config option: `refresh_token_lifetime: 24h`
+### `refresh_token_lifetime: 24h`
 
 Time that a refresh token remains valid for (provided that it is not
 exchanged for another one first).
@@ -1934,7 +1934,7 @@ Example configuration:
 refresh_token_lifetime: 24h
 ```
 ---
-Config option: `nonrefreshable_access_token_lifetime`
+### `nonrefreshable_access_token_lifetime`
 
 Time that an access token remains valid for, if the session is NOT
 using refresh tokens.
@@ -1953,7 +1953,7 @@ Example configuration:
 nonrefreshable_access_token_lifetime: 24h
 ```
 ---
-Config option: `registrations_require_3pid`
+### `registrations_require_3pid`
 
 If this is set, the user must provide all of the specified types of 3PID when registering.
 
@@ -1964,7 +1964,7 @@ registrations_require_3pid:
   - msisdn
 ```
 ---
-Config option: `disable_msisdn_registration`
+### `disable_msisdn_registration`
 
 Explicitly disable asking for MSISDNs from the registration
 flow (overrides `registrations_require_3pid` if MSISDNs are set as required).
@@ -1974,7 +1974,7 @@ Example configuration:
 disable_msisdn_registration: true
 ```
 ---
-Config option: `allowed_local_3pids`
+### `allowed_local_3pids`
 
 Mandate that users are only allowed to associate certain formats of
 3PIDs with accounts on this server, as specified by the `medium` and `pattern` sub-options.
@@ -1990,7 +1990,7 @@ allowed_local_3pids:
     pattern: '\+44'
 ```
 ---
-Config option: `enable_3pid_lookup`
+### `enable_3pid_lookup`
 
 Enable 3PIDs lookup requests to identity servers from this server. Defaults to true.
 
@@ -1999,7 +1999,7 @@ Example configuration:
 enable_3pid_lookup: false
 ```
 ---
-Config option: `registration_requires_token`
+### `registration_requires_token`
 
 Require users to submit a token during registration.
 Tokens can be managed using the admin [API](../administration/admin_api/registration_tokens.md).
@@ -2012,7 +2012,7 @@ Example configuration:
 registration_requires_token: true
 ```
 ---
-Config option: `registration_shared_secret`
+### `registration_shared_secret`
 
 If set, allows registration of standard or admin accounts by anyone who
 has the shared secret, even if registration is otherwise disabled.
@@ -2022,7 +2022,7 @@ Example configuration:
 registration_shared_secret: <PRIVATE STRING>
 ```
 ---
-Config option: `bcrypt_rounds`
+### `bcrypt_rounds`
 
 Set the number of bcrypt rounds used to generate password hash.
 Larger numbers increase the work factor needed to generate the hash.
@@ -2034,7 +2034,7 @@ Example configuration:
 bcrypt_rounds: 14
 ```
 ---
-Config option: `allow_guest_access`
+### `allow_guest_access`
 
 Allows users to register as guests without a password/email/etc, and
 participate in rooms hosted on this server which have been made
@@ -2045,7 +2045,7 @@ Example configuration:
 allow_guest_access: true
 ```
 ---
-Config option: `default_identity_server`
+### `default_identity_server`
 
 The identity server which we suggest that clients should use when users log
 in on this server.
@@ -2058,7 +2058,7 @@ Example configuration:
 default_identity_server: https://matrix.org
 ```
 ---
-Config option: `account_threepid_delegates`
+### `account_threepid_delegates`
 
 Handle threepid (email/phone etc) registration and password resets through a set of
 *trusted* identity servers. Note that this allows the configured identity server to
@@ -2087,7 +2087,7 @@ account_threepid_delegates:
     msisdn: http://localhost:8090  # Delegate SMS sending to this local process
 ```
 ---
-Config option: `enable_set_displayname`
+### `enable_set_displayname`
 
 Whether users are allowed to change their displayname after it has
 been initially set. Useful when provisioning users based on the
@@ -2100,7 +2100,7 @@ Example configuration:
 enable_set_displayname: false
 ```
 ---
-Config option: `enable_set_avatar_url`
+### `enable_set_avatar_url`
 
 Whether users are allowed to change their avatar after it has been
 initially set. Useful when provisioning users based on the contents
@@ -2113,7 +2113,7 @@ Example configuration:
 enable_set_avatar_url: false
 ```
 ---
-Config option: `enable_3pid_changes`
+### `enable_3pid_changes`
 
 Whether users can change the third-party IDs associated with their accounts
 (email address and msisdn).
@@ -2125,7 +2125,7 @@ Example configuration:
 enable_3pid_changes: false
 ```
 ---
-Config option: `auto_join_rooms`
+### `auto_join_rooms`
 
 Users who register on this homeserver will automatically be joined
 to the rooms listed under this option.
@@ -2143,7 +2143,7 @@ auto_join_rooms:
   - "#anotherexampleroom:example.com"
 ```
 ---
-Config option: `autocreate_auto_join_rooms`
+### `autocreate_auto_join_rooms`
 
 Where `auto_join_rooms` are specified, setting this flag ensures that
 the rooms exist by creating them when the first user on the
@@ -2163,7 +2163,7 @@ Example configuration:
 autocreate_auto_join_rooms: false
 ```
 ---
-Config option: `autocreate_auto_join_rooms_federated`
+### `autocreate_auto_join_rooms_federated`
 
 Whether the rooms listen in `auto_join_rooms` that are auto-created are available
 via federation. Only has an effect if `autocreate_auto_join_rooms` is true.
@@ -2180,7 +2180,7 @@ Example configuration:
 autocreate_auto_join_rooms_federated: false
 ```
 ---
-Config option: `autocreate_auto_join_room_preset`
+### `autocreate_auto_join_room_preset`
 
 The room preset to use when auto-creating one of `auto_join_rooms`. Only has an
 effect if `autocreate_auto_join_rooms` is true.
@@ -2202,7 +2202,7 @@ Example configuration:
 autocreate_auto_join_room_preset: private_chat
 ```
 ---
-Config option: `auto_join_mxid_localpart`
+### `auto_join_mxid_localpart`
 
 The local part of the user id which is used to create `auto_join_rooms` if
 `autocreate_auto_join_rooms` is true. If this is not provided then the
@@ -2226,7 +2226,7 @@ Example configuration:
 auto_join_mxid_localpart: system
 ```
 ---
-Config option: `auto_join_rooms_for_guests`
+### `auto_join_rooms_for_guests`
  
 When `auto_join_rooms` is specified, setting this flag to false prevents
 guest accounts from being automatically joined to the rooms.
@@ -2238,7 +2238,7 @@ Example configuration:
 auto_join_rooms_for_guests: false
 ```
 ---
-Config option: `inhibit_user_in_use_error`
+### `inhibit_user_in_use_error`
  
 Whether to inhibit errors raised when registering a new account if the user ID
 already exists. If turned on, requests to `/register/available` will always
@@ -2257,7 +2257,7 @@ inhibit_user_in_use_error: true
 Config options related to metrics.
 
 ---
-Config option: `enable_metrics`
+### `enable_metrics`
 
 Set to true to enable collection and rendering of performance metrics. 
 Defaults to false.
@@ -2267,7 +2267,7 @@ Example configuration:
 enable_metrics: true
 ```
 ---
-Config option: `sentry`
+### `sentry`
 
 Use this option to enable sentry integration. Provide the DSN assigned to you by sentry
 with the `dsn` setting. 
@@ -2284,7 +2284,7 @@ sentry:
     dsn: "..."
 ```
 ---
-Config option: `metrics_flags`
+### `metrics_flags`
 
 Flags to enable Prometheus metrics which are not suitable to be
 enabled by default, either for performance reasons or limited use.
@@ -2299,7 +2299,7 @@ metrics_flags:
     known_servers: true
 ```
 ---
-Config option: `report_stats`
+### `report_stats`
 
 Whether or not to report anonymized homeserver usage statistics. This is originally
 set when generating the config. Set this option to true or false to change the current
@@ -2310,7 +2310,7 @@ Example configuration:
 report_stats: true
 ```
 ---
-Config option: `report_stats_endpoint`
+### `report_stats_endpoint`
 
 The endpoint to report the anonymized homeserver usage statistics to.
 Defaults to https://matrix.org/report-usage-stats/push
@@ -2324,7 +2324,7 @@ report_stats_endpoint: https://example.com/report-usage-stats/push
 Config settings related to the client/server API
 
 ---
-Config option: `room_prejoin_state:`
+### `room_prejoin_state:`
 
 Controls for the state that is shared with users who receive an invite
 to a room. By default, the following state event types are shared with users who 
@@ -2353,7 +2353,7 @@ room_prejoin_state:
      - m.room.join_rules
 ```
 ---
-Config option: `track_puppeted_user_ips`
+### `track_puppeted_user_ips`
 
 We record the IP address of clients used to access the API for various
 reasons, including displaying it to the user in the "Where you're signed in"
@@ -2373,7 +2373,7 @@ Example configuration:
 track_puppeted_user_ips: true
 ```
 ---
-Config option: `app_service_config_files`
+### `app_service_config_files`
 
 A list of application service config files to use.
 
@@ -2384,7 +2384,7 @@ app_service_config_files:
   - app_service_2.yaml
 ```
 ---
-Config option: `track_appservice_user_ips`
+### `track_appservice_user_ips`
 
 Defaults to false. Set to true to enable tracking of application service IP addresses.
 Implicitly enables MAU tracking for application service users.
@@ -2394,7 +2394,7 @@ Example configuration:
 track_appservice_user_ips: true
 ```
 ---
-Config option: `macaroon_secret_key`
+### `macaroon_secret_key`
 
 A secret which is used to sign access tokens. If none is specified,
 the `registration_shared_secret` is used, if one is given; otherwise,
@@ -2405,7 +2405,7 @@ Example configuration:
 macaroon_secret_key: <PRIVATE STRING>
 ```
 ---
-Config option: `form_secret`
+### `form_secret`
 
 A secret which is used to calculate HMACs for form values, to stop
 falsification of values. Must be specified for the User Consent
@@ -2420,7 +2420,7 @@ form_secret: <PRIVATE STRING>
 Config options relating to signing keys
 
 ---
-Config option: `signing_key_path`
+### `signing_key_path`
 
 Path to the signing key to sign messages with.
 
@@ -2429,7 +2429,7 @@ Example configuration:
 signing_key_path: "CONFDIR/SERVERNAME.signing.key"
 ```
 --- 
-Config option: `old_signing_keys`
+### `old_signing_keys`
 
 The keys that the server used to sign messages with but won't use
 to sign new messages. For each key, `key` should be the base64-encoded public key, and
@@ -2445,7 +2445,7 @@ old_signing_keys:
   "ed25519:id": { key: "base64string", expired_ts: 123456789123 }
 ```
 ---
-Config option: `key_refresh_interval`
+### `key_refresh_interval`
 
 How long key response published by this server is valid for.
 Used to set the `valid_until_ts` in `/key/v2` APIs.
@@ -2457,7 +2457,7 @@ Example configuration:
 key_refresh_interval: 2d
 ```
 ---
-Config option: `trusted_key_servers:`
+### `trusted_key_servers:`
 
 The trusted servers to download signing keys from.
 
@@ -2500,7 +2500,7 @@ trusted_key_servers:
   - server_name: "matrix.org"
 ```
 ---
-Config option: `suppress_key_server_warning`
+### `suppress_key_server_warning`
 
 Set the following to true to disable the warning that is emitted when the
 `trusted_key_servers` include 'matrix.org'. See above.
@@ -2510,7 +2510,7 @@ Example configuration:
 suppress_key_server_warning: true
 ```
 ---
-Config option: `key_server_signing_keys_path`
+### `key_server_signing_keys_path`
 
 The signing keys to use when acting as a trusted key server. If not specified
 defaults to the server signing key.
@@ -2536,7 +2536,7 @@ You will also want to investigate the settings under the "sso" configuration
 section below.
 
 ---
-Config option: `saml2_config`
+### `saml2_config`
 
 Enable SAML2 for registration and login. Uses pysaml2. To learn more about pysaml and
 to find a full list options for configuring pysaml, read the docs [here](https://pysaml2.readthedocs.io/en/latest/).
@@ -2673,7 +2673,7 @@ saml2_config:
   idp_entityid: 'https://our_idp/entityid'
 ```
 ---
-Config option: `oidc_providers`
+### `oidc_providers`
 
 List of OpenID Connect (OIDC) / OAuth 2.0 identity providers, for registration
 and login. See [here](../../openid.md)
@@ -2861,7 +2861,7 @@ oidc_providers:
         value: "synapseUsers"
 ```
 ---
-Config option: `cas_config`
+### `cas_config`
 
 Enable Central Authentication Service (CAS) for registration and login.
 Has the following sub-options:
@@ -2887,7 +2887,7 @@ cas_config:
     department: None
 ```
 ---
-Config option: `sso`
+### `sso`
 
 Additional settings to use with single-sign on systems such as OpenID Connect,
 SAML2 and CAS.
@@ -2924,7 +2924,7 @@ sso:
     update_profile_information: true
 ```
 ---
-Config option: `jwt_config`
+### `jwt_config`
 
 JSON web token integration. The following settings can be used to make
 Synapse JSON web tokens for authentication, instead of its internal
@@ -2971,7 +2971,7 @@ jwt_config:
         - "provided-by-your-issuer"
 ```
 ---
-Config option: `password_config`
+### `password_config`
 
 Use this setting to enable password-based logins. 
 
@@ -3015,7 +3015,7 @@ password_config:
       require_uppercase: true
 ```
 ---
-Config option: `ui_auth`
+### `ui_auth`
 
 The amount of time to allow a user-interactive authentication session to be active.
 
@@ -3037,7 +3037,7 @@ ui_auth:
     session_timeout: "15s"
 ```
 ---
-Config option: `email`
+### `email`
 
 Configuration for sending emails from Synapse.
 
@@ -3140,7 +3140,7 @@ email:
 Configuration settings related to push notifications
 
 ---
-Config option: `push`
+### `push`
 
 This setting defines options for push notifications. 
 
@@ -3173,7 +3173,7 @@ push:
 Config options relating to rooms.
 
 ---
-Config option: `encryption_enabled_by_default`
+### `encryption_enabled_by_default`
 
 Controls whether locally-created rooms should be end-to-end encrypted by
 default.
@@ -3195,7 +3195,7 @@ Example configuration:
 encryption_enabled_by_default_for_room_type: invite
 ```
 ---
-Config option: `user_directory`
+### `user_directory`
 
 This setting defines options related to the user directory. 
 
@@ -3226,7 +3226,7 @@ user_directory:
     prefer_local_users: true
 ```
 ---
-Config option: `user_consent`
+### `user_consent`
 
 For detailed instructions on user consent configuration, see [here](../../consent_tracking.md).
 
@@ -3277,7 +3277,7 @@ user_consent:
   policy_name: Privacy Policy
 ```
 ---
-Config option: `stats`
+### `stats`
 
 Settings for local room and user statistics collection. See [here](../../room_and_user_statistics.md)
 for more. 
@@ -3292,7 +3292,7 @@ stats:
   enabled: false
 ```
 ---
-Config option: `server_notices`
+### `server_notices`
 
 Use this setting to enable a room which can be used to send notices
 from the server to users. It is a special room which users cannot leave; notices
@@ -3316,7 +3316,7 @@ server_notices:
   room_name: "Server Notices"
 ```
 ---
-Config option: `enable_room_list_search`
+### `enable_room_list_search`
 
 Set to false to disable searching the public room list. When disabled
 blocks searching local and remote room lists for local and remote
@@ -3327,7 +3327,7 @@ Example configuration:
 enable_room_list_search: false
 ```
 ---
-Config option: `alias_creation`
+### `alias_creation`
 
 The `alias_creation` option controls who is allowed to create aliases
 on this server.
@@ -3388,7 +3388,7 @@ room_list_publication_rules:
 ```
 
 ---
-Config option: `default_power_level_content_override`
+### `default_power_level_content_override`
 
 The `default_power_level_content_override` option controls the default power
 levels for rooms.
@@ -3417,7 +3417,7 @@ default_power_level_content_override:
 Configuration options related to Opentracing support.
 
 ---
-Config option: `opentracing`
+### `opentracing`
 
 These settings enable and configure opentracing, which implements distributed tracing.
 This allows you to observe the causal chains of events across servers
@@ -3460,7 +3460,7 @@ opentracing:
 Configuration options related to workers.
 
 ---
-Config option: `send_federation`
+### `send_federation`
 
 Controls sending of outbound federation transactions on the main process.
 Set to false if using a federation sender worker. Defaults to true. 
@@ -3470,7 +3470,7 @@ Example configuration:
 send_federation: false
 ```
 ---
-Config option: `federation_sender_instances`
+### `federation_sender_instances`
 
 It is possible to run multiple federation sender workers, in which case the
 work is balanced across them. Use this setting to list the senders. 
@@ -3486,7 +3486,7 @@ federation_sender_instances:
   - federation_sender1
 ```
 ---
-Config option: `instance_map`
+### `instance_map`
 
 When using workers this should be a map from worker name to the
 HTTP replication listener of the worker, if configured. 
@@ -3499,7 +3499,7 @@ instance_map:
     port: 8034
 ```
 ---
-Config option: `stream_writers`
+### `stream_writers`
 
 Experimental: When using workers you can define which workers should
 handle event persistence and typing notifications. Any worker
@@ -3512,7 +3512,7 @@ stream_writers:
   typing: worker1
 ```
 ---
-Config option: `run_background_tasks_on`
+### `run_background_tasks_on`
 
 The worker that is used to run background tasks (e.g. cleaning up expired
 data). If not provided this defaults to the main process.
@@ -3522,7 +3522,7 @@ Example configuration:
 run_background_tasks_on: worker1
 ```
 ---
-Config option: `worker_replication_secret`
+### `worker_replication_secret`
 
 A shared secret used by the replication APIs to authenticate HTTP requests
 from workers.
@@ -3533,7 +3533,7 @@ Example configuration:
 ```yaml
 worker_replication_secret: "secret_secret"
 ```
-Config option: `redis`
+### `redis`
 
 Configuration for Redis when using workers. This *must* be enabled when
 using workers (unless using old style direct TCP configuration).
@@ -3555,7 +3555,7 @@ redis:
 Configuration settings related to background updates. 
 
 ---
-Config option: `background_updates`
+### `background_updates`
 
 Background updates are database updates that are run in the background in batches.
 The duration, minimum batch size, default batch size, whether to sleep between batches and if so, how long to

From 8ecf6be1e1a737a09f51137302ad0d9ae4ed519b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 15 Jun 2022 19:48:22 +0100
Subject: [PATCH 005/178] Move some event auth checks out to a different method
 (#13065)

* Add auth events to events used in tests

* Move some event auth checks out to a different method

Some of the event auth checks apply to an event's auth_events, rather than the
state at the event - which means they can play no part in state
resolution. Move them out to a separate method.

* Rename check_auth_rules_for_event

Now it only checks the state-dependent auth rules, it needs a better name.
---
 changelog.d/13065.misc               |   1 +
 synapse/event_auth.py                | 108 ++++++++++++-----
 synapse/handlers/event_auth.py       |   8 +-
 synapse/handlers/federation_event.py |  27 +++--
 synapse/state/v1.py                  |   4 +-
 synapse/state/v2.py                  |   2 +-
 tests/test_event_auth.py             | 167 ++++++++++++++++++---------
 7 files changed, 219 insertions(+), 98 deletions(-)
 create mode 100644 changelog.d/13065.misc

diff --git a/changelog.d/13065.misc b/changelog.d/13065.misc
new file mode 100644
index 0000000000..e9e8a7659a
--- /dev/null
+++ b/changelog.d/13065.misc
@@ -0,0 +1 @@
+Avoid rechecking event auth rules which are independent of room state.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index e23503c1e0..360a50cc71 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -15,11 +15,12 @@
 
 import logging
 import typing
-from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
+from typing import Any, Collection, Dict, Iterable, List, Optional, Set, Tuple, Union
 
 from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
 from signedjson.sign import SignatureVerifyException, verify_signed_json
+from typing_extensions import Protocol
 from unpaddedbase64 import decode_base64
 
 from synapse.api.constants import (
@@ -35,7 +36,8 @@ from synapse.api.room_versions import (
     EventFormatVersions,
     RoomVersion,
 )
-from synapse.types import StateMap, UserID, get_domain_from_id
+from synapse.storage.databases.main.events_worker import EventRedactBehaviour
+from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id
 
 if typing.TYPE_CHECKING:
     # conditional imports to avoid import cycle
@@ -45,6 +47,17 @@ if typing.TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+class _EventSourceStore(Protocol):
+    async def get_events(
+        self,
+        event_ids: Collection[str],
+        redact_behaviour: EventRedactBehaviour,
+        get_prev_content: bool = False,
+        allow_rejected: bool = False,
+    ) -> Dict[str, "EventBase"]:
+        ...
+
+
 def validate_event_for_room_version(event: "EventBase") -> None:
     """Ensure that the event complies with the limits, and has the right signatures
 
@@ -112,47 +125,52 @@ def validate_event_for_room_version(event: "EventBase") -> None:
             raise AuthError(403, "Event not signed by authorising server")
 
 
-def check_auth_rules_for_event(
+async def check_state_independent_auth_rules(
+    store: _EventSourceStore,
     event: "EventBase",
-    auth_events: Iterable["EventBase"],
 ) -> None:
-    """Check that an event complies with the auth rules
+    """Check that an event complies with auth rules that are independent of room state
 
-    Checks whether an event passes the auth rules with a given set of state events
-
-    Assumes that we have already checked that the event is the right shape (it has
-    enough signatures, has a room ID, etc). In other words:
-
-     - it's fine for use in state resolution, when we have already decided whether to
-       accept the event or not, and are now trying to decide whether it should make it
-       into the room state
-
-     - when we're doing the initial event auth, it is only suitable in combination with
-       a bunch of other tests.
+    Runs through the first few auth rules, which are independent of room state. (Which
+    means that we only need to them once for each received event)
 
     Args:
+        store: the datastore; used to fetch the auth events for validation
         event: the event being checked.
-        auth_events: the room state to check the events against.
 
     Raises:
         AuthError if the checks fail
     """
-    # We need to ensure that the auth events are actually for the same room, to
-    # stop people from using powers they've been granted in other rooms for
-    # example.
-    #
-    # Arguably we don't need to do this when we're just doing state res, as presumably
-    # the state res algorithm isn't silly enough to give us events from different rooms.
-    # Still, it's easier to do it anyway.
+    # Check the auth events.
+    auth_events = await store.get_events(
+        event.auth_event_ids(),
+        redact_behaviour=EventRedactBehaviour.as_is,
+        allow_rejected=True,
+    )
     room_id = event.room_id
-    for auth_event in auth_events:
+    auth_dict: MutableStateMap[str] = {}
+    for auth_event_id in event.auth_event_ids():
+        auth_event = auth_events.get(auth_event_id)
+
+        # we should have all the auth events by now, so if we do not, that suggests
+        # a synapse programming error
+        if auth_event is None:
+            raise RuntimeError(
+                f"Event {event.event_id} has unknown auth event {auth_event_id}"
+            )
+
+        # We need to ensure that the auth events are actually for the same room, to
+        # stop people from using powers they've been granted in other rooms for
+        # example.
         if auth_event.room_id != room_id:
             raise AuthError(
                 403,
                 "During auth for event %s in room %s, found event %s in the state "
                 "which is in room %s"
-                % (event.event_id, room_id, auth_event.event_id, auth_event.room_id),
+                % (event.event_id, room_id, auth_event_id, auth_event.room_id),
             )
+
+        # We also need to check that the auth event itself is not rejected.
         if auth_event.rejected_reason:
             raise AuthError(
                 403,
@@ -160,6 +178,8 @@ def check_auth_rules_for_event(
                 % (event.event_id, auth_event.event_id),
             )
 
+        auth_dict[(auth_event.type, auth_event.state_key)] = auth_event_id
+
     # Implementation of https://matrix.org/docs/spec/rooms/v1#authorization-rules
     #
     # 1. If type is m.room.create:
@@ -181,16 +201,46 @@ def check_auth_rules_for_event(
                 "room appears to have unsupported version %s" % (room_version_prop,),
             )
 
-        logger.debug("Allowing! %s", event)
         return
 
-    auth_dict = {(e.type, e.state_key): e for e in auth_events}
-
     # 3. If event does not have a m.room.create in its auth_events, reject.
     creation_event = auth_dict.get((EventTypes.Create, ""), None)
     if not creation_event:
         raise AuthError(403, "No create event in auth events")
 
+
+def check_state_dependent_auth_rules(
+    event: "EventBase",
+    auth_events: Iterable["EventBase"],
+) -> None:
+    """Check that an event complies with auth rules that depend on room state
+
+    Runs through the parts of the auth rules that check an event against bits of room
+    state.
+
+    Note:
+
+     - it's fine for use in state resolution, when we have already decided whether to
+       accept the event or not, and are now trying to decide whether it should make it
+       into the room state
+
+     - when we're doing the initial event auth, it is only suitable in combination with
+       a bunch of other tests (including, but not limited to, check_state_independent_auth_rules).
+
+    Args:
+        event: the event being checked.
+        auth_events: the room state to check the events against.
+
+    Raises:
+        AuthError if the checks fail
+    """
+    # there are no state-dependent auth rules for create events.
+    if event.type == EventTypes.Create:
+        logger.debug("Allowing! %s", event)
+        return
+
+    auth_dict = {(e.type, e.state_key): e for e in auth_events}
+
     # additional check for m.federate
     creating_domain = get_domain_from_id(event.room_id)
     originating_domain = get_domain_from_id(event.sender)
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index ed4149bd58..a2dd9c7efa 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -23,7 +23,10 @@ from synapse.api.constants import (
 )
 from synapse.api.errors import AuthError, Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
-from synapse.event_auth import check_auth_rules_for_event
+from synapse.event_auth import (
+    check_state_dependent_auth_rules,
+    check_state_independent_auth_rules,
+)
 from synapse.events import EventBase
 from synapse.events.builder import EventBuilder
 from synapse.events.snapshot import EventContext
@@ -52,9 +55,10 @@ class EventAuthHandler:
         context: EventContext,
     ) -> None:
         """Check an event passes the auth rules at its own auth events"""
+        await check_state_independent_auth_rules(self._store, event)
         auth_event_ids = event.auth_event_ids()
         auth_events_by_id = await self._store.get_events(auth_event_ids)
-        check_auth_rules_for_event(event, auth_events_by_id.values())
+        check_state_dependent_auth_rules(event, auth_events_by_id.values())
 
     def compute_auth_events(
         self,
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 6c9e6a00b5..565ffd7cfd 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -50,7 +50,8 @@ from synapse.api.errors import (
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion, RoomVersions
 from synapse.event_auth import (
     auth_types_for_event,
-    check_auth_rules_for_event,
+    check_state_dependent_auth_rules,
+    check_state_independent_auth_rules,
     validate_event_for_room_version,
 )
 from synapse.events import EventBase
@@ -1430,7 +1431,9 @@ class FederationEventHandler:
             allow_rejected=True,
         )
 
-        def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]:
+        events_and_contexts_to_persist: List[Tuple[EventBase, EventContext]] = []
+
+        async def prep(event: EventBase) -> None:
             with nested_logging_context(suffix=event.event_id):
                 auth = []
                 for auth_event_id in event.auth_event_ids():
@@ -1444,7 +1447,7 @@ class FederationEventHandler:
                             event,
                             auth_event_id,
                         )
-                        return None
+                        return
                     auth.append(ae)
 
                 # we're not bothering about room state, so flag the event as an outlier.
@@ -1453,17 +1456,20 @@ class FederationEventHandler:
                 context = EventContext.for_outlier(self._storage_controllers)
                 try:
                     validate_event_for_room_version(event)
-                    check_auth_rules_for_event(event, auth)
+                    await check_state_independent_auth_rules(self._store, event)
+                    check_state_dependent_auth_rules(event, auth)
                 except AuthError as e:
                     logger.warning("Rejecting %r because %s", event, e)
                     context.rejected = RejectedReason.AUTH_ERROR
 
-            return event, context
+            events_and_contexts_to_persist.append((event, context))
+
+        for event in fetched_events:
+            await prep(event)
 
-        events_to_persist = (x for x in (prep(event) for event in fetched_events) if x)
         await self.persist_events_and_notify(
             room_id,
-            tuple(events_to_persist),
+            events_and_contexts_to_persist,
             # Mark these events backfilled as they're historic events that will
             # eventually be backfilled. For example, missing events we fetch
             # during backfill should be marked as backfilled as well.
@@ -1515,7 +1521,8 @@ class FederationEventHandler:
 
         # ... and check that the event passes auth at those auth events.
         try:
-            check_auth_rules_for_event(event, claimed_auth_events)
+            await check_state_independent_auth_rules(self._store, event)
+            check_state_dependent_auth_rules(event, claimed_auth_events)
         except AuthError as e:
             logger.warning(
                 "While checking auth of %r against auth_events: %s", event, e
@@ -1563,7 +1570,7 @@ class FederationEventHandler:
             auth_events_for_auth = calculated_auth_event_map
 
         try:
-            check_auth_rules_for_event(event, auth_events_for_auth.values())
+            check_state_dependent_auth_rules(event, auth_events_for_auth.values())
         except AuthError as e:
             logger.warning("Failed auth resolution for %r because %s", event, e)
             context.rejected = RejectedReason.AUTH_ERROR
@@ -1663,7 +1670,7 @@ class FederationEventHandler:
         )
 
         try:
-            check_auth_rules_for_event(event, current_auth_events)
+            check_state_dependent_auth_rules(event, current_auth_events)
         except AuthError as e:
             logger.warning(
                 "Soft-failing %r (from %s) because %s",
diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 8bbb4ce41c..500e384695 100644
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@@ -330,7 +330,7 @@ def _resolve_auth_events(
         auth_events[(prev_event.type, prev_event.state_key)] = prev_event
         try:
             # The signatures have already been checked at this point
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 event,
                 auth_events.values(),
             )
@@ -347,7 +347,7 @@ def _resolve_normal_events(
     for event in _ordered_events(events):
         try:
             # The signatures have already been checked at this point
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 event,
                 auth_events.values(),
             )
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 6a16f38a15..7db032203b 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -573,7 +573,7 @@ async def _iterative_auth_checks(
                     auth_events[key] = event_map[ev_id]
 
         try:
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 event,
                 auth_events.values(),
             )
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index 229ecd84a6..e8e458cfd3 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -1,4 +1,4 @@
-# Copyright 2018 New Vector Ltd
+# Copyright 2018-2022 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import unittest
-from typing import Optional
+from typing import Collection, Dict, Iterable, List, Optional
 
 from parameterized import parameterized
 
@@ -22,8 +22,41 @@ from synapse.api.constants import EventContentFields
 from synapse.api.errors import AuthError
 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions
 from synapse.events import EventBase, make_event_from_dict
+from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import JsonDict, get_domain_from_id
 
+from tests.test_utils import get_awaitable_result
+
+
+class _StubEventSourceStore:
+    """A stub implementation of the EventSourceStore"""
+
+    def __init__(self):
+        self._store: Dict[str, EventBase] = {}
+
+    def add_event(self, event: EventBase):
+        self._store[event.event_id] = event
+
+    def add_events(self, events: Iterable[EventBase]):
+        for event in events:
+            self._store[event.event_id] = event
+
+    async def get_events(
+        self,
+        event_ids: Collection[str],
+        redact_behaviour: EventRedactBehaviour,
+        get_prev_content: bool = False,
+        allow_rejected: bool = False,
+    ) -> Dict[str, EventBase]:
+        assert allow_rejected
+        assert not get_prev_content
+        assert redact_behaviour == EventRedactBehaviour.as_is
+        results = {}
+        for e in event_ids:
+            if e in self._store:
+                results[e] = self._store[e]
+        return results
+
 
 class EventAuthTestCase(unittest.TestCase):
     def test_rejected_auth_events(self):
@@ -36,11 +69,15 @@ class EventAuthTestCase(unittest.TestCase):
             _join_event(RoomVersions.V9, creator),
         ]
 
+        event_store = _StubEventSourceStore()
+        event_store.add_events(auth_events)
+
         # creator should be able to send state
-        event_auth.check_auth_rules_for_event(
-            _random_state_event(RoomVersions.V9, creator),
-            auth_events,
+        event = _random_state_event(RoomVersions.V9, creator, auth_events)
+        get_awaitable_result(
+            event_auth.check_state_independent_auth_rules(event_store, event)
         )
+        event_auth.check_state_dependent_auth_rules(event, auth_events)
 
         # ... but a rejected join_rules event should cause it to be rejected
         rejected_join_rules = _join_rules_event(
@@ -50,23 +87,27 @@ class EventAuthTestCase(unittest.TestCase):
         )
         rejected_join_rules.rejected_reason = "stinky"
         auth_events.append(rejected_join_rules)
+        event_store.add_event(rejected_join_rules)
 
-        self.assertRaises(
-            AuthError,
-            event_auth.check_auth_rules_for_event,
-            _random_state_event(RoomVersions.V9, creator),
-            auth_events,
-        )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(
+                    event_store,
+                    _random_state_event(RoomVersions.V9, creator),
+                )
+            )
 
         # ... even if there is *also* a good join rules
         auth_events.append(_join_rules_event(RoomVersions.V9, creator, "public"))
+        event_store.add_event(rejected_join_rules)
 
-        self.assertRaises(
-            AuthError,
-            event_auth.check_auth_rules_for_event,
-            _random_state_event(RoomVersions.V9, creator),
-            auth_events,
-        )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(
+                    event_store,
+                    _random_state_event(RoomVersions.V9, creator),
+                )
+            )
 
     def test_random_users_cannot_send_state_before_first_pl(self):
         """
@@ -82,7 +123,7 @@ class EventAuthTestCase(unittest.TestCase):
         ]
 
         # creator should be able to send state
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _random_state_event(RoomVersions.V1, creator),
             auth_events,
         )
@@ -90,7 +131,7 @@ class EventAuthTestCase(unittest.TestCase):
         # joiner should not be able to send state
         self.assertRaises(
             AuthError,
-            event_auth.check_auth_rules_for_event,
+            event_auth.check_state_dependent_auth_rules,
             _random_state_event(RoomVersions.V1, joiner),
             auth_events,
         )
@@ -119,13 +160,13 @@ class EventAuthTestCase(unittest.TestCase):
         # pleb should not be able to send state
         self.assertRaises(
             AuthError,
-            event_auth.check_auth_rules_for_event,
+            event_auth.check_state_dependent_auth_rules,
             _random_state_event(RoomVersions.V1, pleb),
             auth_events,
         ),
 
         # king should be able to send state
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _random_state_event(RoomVersions.V1, king),
             auth_events,
         )
@@ -140,27 +181,27 @@ class EventAuthTestCase(unittest.TestCase):
         ]
 
         # creator should be able to send aliases
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _alias_event(RoomVersions.V1, creator),
             auth_events,
         )
 
         # Reject an event with no state key.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _alias_event(RoomVersions.V1, creator, state_key=""),
                 auth_events,
             )
 
         # If the domain of the sender does not match the state key, reject.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _alias_event(RoomVersions.V1, creator, state_key="test.com"),
                 auth_events,
             )
 
         # Note that the member does *not* need to be in the room.
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _alias_event(RoomVersions.V1, other),
             auth_events,
         )
@@ -175,24 +216,24 @@ class EventAuthTestCase(unittest.TestCase):
         ]
 
         # creator should be able to send aliases
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _alias_event(RoomVersions.V6, creator),
             auth_events,
         )
 
         # No particular checks are done on the state key.
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _alias_event(RoomVersions.V6, creator, state_key=""),
             auth_events,
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _alias_event(RoomVersions.V6, creator, state_key="test.com"),
             auth_events,
         )
 
         # Per standard auth rules, the member must be in the room.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _alias_event(RoomVersions.V6, other),
                 auth_events,
             )
@@ -220,12 +261,12 @@ class EventAuthTestCase(unittest.TestCase):
 
         # on room V1, pleb should be able to modify the notifications power level.
         if allow_modification:
-            event_auth.check_auth_rules_for_event(pl_event, auth_events)
+            event_auth.check_state_dependent_auth_rules(pl_event, auth_events)
 
         else:
             # But an MSC2209 room rejects this change.
             with self.assertRaises(AuthError):
-                event_auth.check_auth_rules_for_event(pl_event, auth_events)
+                event_auth.check_state_dependent_auth_rules(pl_event, auth_events)
 
     def test_join_rules_public(self):
         """
@@ -243,14 +284,14 @@ class EventAuthTestCase(unittest.TestCase):
         }
 
         # Check join.
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
 
         # A user cannot be force-joined to a room.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _member_event(RoomVersions.V6, pleb, "join", sender=creator),
                 auth_events.values(),
             )
@@ -260,7 +301,7 @@ class EventAuthTestCase(unittest.TestCase):
             RoomVersions.V6, pleb, "ban"
         )
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V6, pleb),
                 auth_events.values(),
             )
@@ -269,7 +310,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V6, pleb, "leave"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
@@ -278,7 +319,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V6, pleb, "join"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
@@ -287,7 +328,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V6, pleb, "invite", sender=creator
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
@@ -309,14 +350,14 @@ class EventAuthTestCase(unittest.TestCase):
 
         # A join without an invite is rejected.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V6, pleb),
                 auth_events.values(),
             )
 
         # A user cannot be force-joined to a room.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _member_event(RoomVersions.V6, pleb, "join", sender=creator),
                 auth_events.values(),
             )
@@ -326,7 +367,7 @@ class EventAuthTestCase(unittest.TestCase):
             RoomVersions.V6, pleb, "ban"
         )
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V6, pleb),
                 auth_events.values(),
             )
@@ -336,7 +377,7 @@ class EventAuthTestCase(unittest.TestCase):
             RoomVersions.V6, pleb, "leave"
         )
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V6, pleb),
                 auth_events.values(),
             )
@@ -345,7 +386,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V6, pleb, "join"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
@@ -354,7 +395,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V6, pleb, "invite", sender=creator
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V6, pleb),
             auth_events.values(),
         )
@@ -376,7 +417,7 @@ class EventAuthTestCase(unittest.TestCase):
         }
 
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V6, pleb),
                 auth_events.values(),
             )
@@ -413,7 +454,7 @@ class EventAuthTestCase(unittest.TestCase):
                 EventContentFields.AUTHORISING_USER: "@creator:example.com"
             },
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             authorised_join_event,
             auth_events.values(),
         )
@@ -429,7 +470,7 @@ class EventAuthTestCase(unittest.TestCase):
         pl_auth_events[("m.room.member", "@inviter:foo.test")] = _join_event(
             RoomVersions.V8, "@inviter:foo.test"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(
                 RoomVersions.V8,
                 pleb,
@@ -442,7 +483,7 @@ class EventAuthTestCase(unittest.TestCase):
 
         # A join which is missing an authorised server is rejected.
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(RoomVersions.V8, pleb),
                 auth_events.values(),
             )
@@ -455,7 +496,7 @@ class EventAuthTestCase(unittest.TestCase):
             {"invite": 100, "users": {"@other:example.com": 150}},
         )
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _join_event(
                     RoomVersions.V8,
                     pleb,
@@ -469,7 +510,7 @@ class EventAuthTestCase(unittest.TestCase):
         # A user cannot be force-joined to a room. (This uses an event which
         # *would* be valid, but is sent be a different user.)
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 _member_event(
                     RoomVersions.V8,
                     pleb,
@@ -487,7 +528,7 @@ class EventAuthTestCase(unittest.TestCase):
             RoomVersions.V8, pleb, "ban"
         )
         with self.assertRaises(AuthError):
-            event_auth.check_auth_rules_for_event(
+            event_auth.check_state_dependent_auth_rules(
                 authorised_join_event,
                 auth_events.values(),
             )
@@ -496,7 +537,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V8, pleb, "leave"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             authorised_join_event,
             auth_events.values(),
         )
@@ -506,7 +547,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V8, pleb, "join"
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V8, pleb),
             auth_events.values(),
         )
@@ -516,7 +557,7 @@ class EventAuthTestCase(unittest.TestCase):
         auth_events[("m.room.member", pleb)] = _member_event(
             RoomVersions.V8, pleb, "invite", sender=creator
         )
-        event_auth.check_auth_rules_for_event(
+        event_auth.check_state_dependent_auth_rules(
             _join_event(RoomVersions.V8, pleb),
             auth_events.values(),
         )
@@ -539,6 +580,7 @@ def _create_event(
             "state_key": "",
             "sender": user_id,
             "content": {"creator": user_id},
+            "auth_events": [],
         },
         room_version=room_version,
     )
@@ -559,6 +601,7 @@ def _member_event(
             "sender": sender or user_id,
             "state_key": user_id,
             "content": {"membership": membership, **(additional_content or {})},
+            "auth_events": [],
             "prev_events": [],
         },
         room_version=room_version,
@@ -609,7 +652,22 @@ def _alias_event(room_version: RoomVersion, sender: str, **kwargs) -> EventBase:
     return make_event_from_dict(data, room_version=room_version)
 
 
-def _random_state_event(room_version: RoomVersion, sender: str) -> EventBase:
+def _build_auth_dict_for_room_version(
+    room_version: RoomVersion, auth_events: Iterable[EventBase]
+) -> List:
+    if room_version.event_format == EventFormatVersions.V1:
+        return [(e.event_id, "not_used") for e in auth_events]
+    else:
+        return [e.event_id for e in auth_events]
+
+
+def _random_state_event(
+    room_version: RoomVersion,
+    sender: str,
+    auth_events: Optional[Iterable[EventBase]] = None,
+) -> EventBase:
+    if auth_events is None:
+        auth_events = []
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -618,6 +676,7 @@ def _random_state_event(room_version: RoomVersion, sender: str) -> EventBase:
             "sender": sender,
             "state_key": "",
             "content": {"membership": "join"},
+            "auth_events": _build_auth_dict_for_room_version(room_version, auth_events),
         },
         room_version=room_version,
     )

From ffe2464836dec7bbce2659b2b4e62eb956bf2a90 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 16 Jun 2022 10:31:10 +0100
Subject: [PATCH 006/178] Add instructions for running Complement with
 `gotestfmt`-formatted output locally. (#13073)

---
 changelog.d/13073.doc                  |  1 +
 docs/development/contributing_guide.md | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 changelog.d/13073.doc

diff --git a/changelog.d/13073.doc b/changelog.d/13073.doc
new file mode 100644
index 0000000000..e162a8404e
--- /dev/null
+++ b/changelog.d/13073.doc
@@ -0,0 +1 @@
+Add instructions for running Complement with `gotestfmt`-formatted output locally.
\ No newline at end of file
diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md
index c2f04a3905..4738f8a6b6 100644
--- a/docs/development/contributing_guide.md
+++ b/docs/development/contributing_guide.md
@@ -310,6 +310,20 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data
 - Passing `WORKERS=1` as an environment variable to use a workerised setup instead. This option implies the use of Postgres.
 
 
+### Prettier formatting with `gotestfmt`
+
+If you want to format the output of the tests the same way as it looks in CI,
+install [gotestfmt](https://github.com/haveyoudebuggedit/gotestfmt).
+
+You can then use this incantation to format the tests appropriately:
+
+```sh
+COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -json | gotestfmt -hide successful-tests
+```
+
+(Remove `-hide successful-tests` if you don't want to hide successful tests.)
+
+
 ### Access database for homeserver after Complement test runs.
 
 If you're curious what the database looks like after you run some tests, here are some steps to get you going in Synapse:

From 0ef1307619799bec2bbb96ce6516f307b0f8f217 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jacek=20Ku=C5=9Bnierz?= <jacek.kusnierz@tum.de>
Date: Thu, 16 Jun 2022 12:48:18 +0200
Subject: [PATCH 007/178] Add custom well-known (#13035)

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
---
 changelog.d/13035.feature                     |  1 +
 .../configuration/config_documentation.md     | 17 ++++++++++++++
 synapse/config/server.py                      | 20 +++++++++++++++++
 synapse/rest/well_known.py                    |  9 +++++++-
 tests/rest/test_well_known.py                 | 22 +++++++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13035.feature

diff --git a/changelog.d/13035.feature b/changelog.d/13035.feature
new file mode 100644
index 0000000000..cfca3ab4b7
--- /dev/null
+++ b/changelog.d/13035.feature
@@ -0,0 +1 @@
+Allow server admins to customise the response of the `/.well-known/matrix/client` endpoint.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 7c9860c3e1..58a74ace48 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -230,6 +230,22 @@ Example configuration:
 serve_server_wellknown: true
 ```
 ---
+### `extra_well_known_client_content `
+
+This option allows server runners to add arbitrary key-value pairs to the [client-facing `.well-known` response](https://spec.matrix.org/latest/client-server-api/#well-known-uri).
+Note that the `public_baseurl` config option must be provided for Synapse to serve a response to `/.well-known/matrix/client` at all.
+
+If this option is provided, it parses the given yaml to json and 
+serves it on `/.well-known/matrix/client` endpoint
+alongside the standard properties.
+
+Example configuration:
+```yaml
+extra_well_known_client_content : 
+  option1: value1
+  option2: value2
+```
+---
 ### `soft_file_limit`
  
 Set the soft limit on the number of file descriptors synapse can use.
@@ -3580,3 +3596,4 @@ background_updates:
     min_batch_size: 10
     default_batch_size: 50
 ```
+
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 828938e5ec..085fe22c51 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -301,6 +301,26 @@ class ServerConfig(Config):
                 "public_baseurl cannot contain query parameters or a #-fragment"
             )
 
+        self.extra_well_known_client_content = config.get(
+            "extra_well_known_client_content", {}
+        )
+
+        if not isinstance(self.extra_well_known_client_content, dict):
+            raise ConfigError(
+                "extra_well_known_content must be a dictionary of key-value pairs"
+            )
+
+        if "m.homeserver" in self.extra_well_known_client_content:
+            raise ConfigError(
+                "m.homeserver is not supported in extra_well_known_content, "
+                "use public_baseurl in base config instead."
+            )
+        if "m.identity_server" in self.extra_well_known_client_content:
+            raise ConfigError(
+                "m.identity_server is not supported in extra_well_known_content, "
+                "use default_identity_server in base config instead."
+            )
+
         # Whether to enable user presence.
         presence_config = config.get("presence") or {}
         self.use_presence = presence_config.get("enabled")
diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py
index 04b035a1b1..6f7ac54c65 100644
--- a/synapse/rest/well_known.py
+++ b/synapse/rest/well_known.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
 from typing import TYPE_CHECKING, Optional
 
@@ -44,6 +43,14 @@ class WellKnownBuilder:
                 "base_url": self._config.registration.default_identity_server
             }
 
+        if self._config.server.extra_well_known_client_content:
+            for (
+                key,
+                value,
+            ) in self._config.server.extra_well_known_client_content.items():
+                if key not in result:
+                    result[key] = value
+
         return result
 
 
diff --git a/tests/rest/test_well_known.py b/tests/rest/test_well_known.py
index 11f78f52b8..d8faafec75 100644
--- a/tests/rest/test_well_known.py
+++ b/tests/rest/test_well_known.py
@@ -59,6 +59,28 @@ class WellKnownTests(unittest.HomeserverTestCase):
 
         self.assertEqual(channel.code, HTTPStatus.NOT_FOUND)
 
+    @unittest.override_config(
+        {
+            "public_baseurl": "https://tesths",
+            "default_identity_server": "https://testis",
+            "extra_well_known_client_content": {"custom": False},
+        }
+    )
+    def test_client_well_known_custom(self) -> None:
+        channel = self.make_request(
+            "GET", "/.well-known/matrix/client", shorthand=False
+        )
+
+        self.assertEqual(channel.code, HTTPStatus.OK)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "m.homeserver": {"base_url": "https://tesths/"},
+                "m.identity_server": {"base_url": "https://testis"},
+                "custom": False,
+            },
+        )
+
     @unittest.override_config({"serve_server_wellknown": True})
     def test_server_well_known(self) -> None:
         channel = self.make_request(

From 1e0044e8f9f20ee0e8be9ad40c48be3a67e0f54e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 16 Jun 2022 12:12:26 +0100
Subject: [PATCH 008/178] Complement: use SQLite by default (#13075)

If no database is configured explicitly, use sqlite.

This means that you don't have to pass any variables into the image.
---
 changelog.d/13075.misc                         | 1 +
 docker/complement/README.md                    | 2 +-
 docker/complement/conf/start_for_complement.sh | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13075.misc

diff --git a/changelog.d/13075.misc b/changelog.d/13075.misc
new file mode 100644
index 0000000000..2311629f7b
--- /dev/null
+++ b/changelog.d/13075.misc
@@ -0,0 +1 @@
+Merge the Complement testing Docker images into a single, multi-purpose image.
diff --git a/docker/complement/README.md b/docker/complement/README.md
index 37c39e2dfc..62682219e8 100644
--- a/docker/complement/README.md
+++ b/docker/complement/README.md
@@ -7,7 +7,7 @@ so **please don't use this image for a production server**.
 This multi-purpose image is built on top of `Dockerfile-workers` in the parent directory
 and can be switched using environment variables between the following configurations:
 
-- Monolithic Synapse with SQLite (`SYNAPSE_COMPLEMENT_DATABASE=sqlite`)
+- Monolithic Synapse with SQLite (default, or `SYNAPSE_COMPLEMENT_DATABASE=sqlite`)
 - Monolithic Synapse with Postgres (`SYNAPSE_COMPLEMENT_DATABASE=postgres`)
 - Workerised Synapse with Postgres (`SYNAPSE_COMPLEMENT_DATABASE=postgres` and `SYNAPSE_COMPLEMENT_USE_WORKERS=true`)
 
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index b9c97ab687..65da99b8da 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -31,7 +31,7 @@ case "$SYNAPSE_COMPLEMENT_DATABASE" in
     export START_POSTGRES=true
     ;;
 
-  sqlite)
+  sqlite|"")
     # Configure supervisord not to start Postgres, as we don't need it
     export START_POSTGRES=false
     ;;

From 755261524728c4a8c3f48c91590bdaef0731651e Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 16 Jun 2022 12:40:29 +0100
Subject: [PATCH 009/178] Reduce the duplication of code that invokes the rate
 limiter. (#13070)

---
 changelog.d/13070.misc          |  1 +
 synapse/handlers/room_member.py | 30 +++---------------------------
 2 files changed, 4 insertions(+), 27 deletions(-)
 create mode 100644 changelog.d/13070.misc

diff --git a/changelog.d/13070.misc b/changelog.d/13070.misc
new file mode 100644
index 0000000000..ce1f14342d
--- /dev/null
+++ b/changelog.d/13070.misc
@@ -0,0 +1 @@
+Reduce the duplication of code that invokes the rate limiter.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index e89b7441ad..bf6bae1232 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -26,13 +26,7 @@ from synapse.api.constants import (
     GuestAccess,
     Membership,
 )
-from synapse.api.errors import (
-    AuthError,
-    Codes,
-    LimitExceededError,
-    ShadowBanError,
-    SynapseError,
-)
+from synapse.api.errors import AuthError, Codes, ShadowBanError, SynapseError
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.event_auth import get_named_level, get_power_level_event
 from synapse.events import EventBase
@@ -380,16 +374,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             # Only rate-limit if the user actually joined the room, otherwise we'll end
             # up blocking profile updates.
             if newly_joined and ratelimit:
-                time_now_s = self.clock.time()
-                (
-                    allowed,
-                    time_allowed,
-                ) = await self._join_rate_limiter_local.can_do_action(requester)
-
-                if not allowed:
-                    raise LimitExceededError(
-                        retry_after_ms=int(1000 * (time_allowed - time_now_s))
-                    )
+                await self._join_rate_limiter_local.ratelimit(requester)
 
         result_event = await self.event_creation_handler.handle_new_client_event(
             requester,
@@ -835,19 +820,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             )
             if remote_join:
                 if ratelimit:
-                    time_now_s = self.clock.time()
-                    (
-                        allowed,
-                        time_allowed,
-                    ) = await self._join_rate_limiter_remote.can_do_action(
+                    await self._join_rate_limiter_remote.ratelimit(
                         requester,
                     )
 
-                    if not allowed:
-                        raise LimitExceededError(
-                            retry_after_ms=int(1000 * (time_allowed - time_now_s))
-                        )
-
                 inviter = await self._get_inviter(target.to_string(), room_id)
                 if inviter and not self.hs.is_mine(inviter):
                     remote_room_hosts.append(inviter.domain)

From 0fcc0ae37c959116c910f349a8025bd6921fdfc8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 16 Jun 2022 07:41:57 -0400
Subject: [PATCH 010/178] Improve URL previews for sites with only Twitter card
 information. (#13056)

Pull out `twitter:` meta tags when generating a preview and
use it to augment any `og:` meta tags.

Prefers Open Graph information over Twitter card information.
---
 changelog.d/13056.feature                |   1 +
 synapse/rest/media/v1/preview_html.py    | 112 +++++++++++++++++++----
 tests/rest/media/v1/test_html_preview.py |  41 +++++++++
 3 files changed, 137 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/13056.feature

diff --git a/changelog.d/13056.feature b/changelog.d/13056.feature
new file mode 100644
index 0000000000..219e2f6c1e
--- /dev/null
+++ b/changelog.d/13056.feature
@@ -0,0 +1 @@
+Improve URL previews for sites which only provide Twitter Card metadata, e.g. LWN.net.
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index ed8f21a483..c826a13093 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -15,7 +15,16 @@ import codecs
 import itertools
 import logging
 import re
-from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Union
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    Optional,
+    Set,
+    Union,
+)
 
 if TYPE_CHECKING:
     from lxml import etree
@@ -146,6 +155,70 @@ def decode_body(
     return etree.fromstring(body, parser)
 
 
+def _get_meta_tags(
+    tree: "etree.Element",
+    property: str,
+    prefix: str,
+    property_mapper: Optional[Callable[[str], Optional[str]]] = None,
+) -> Dict[str, Optional[str]]:
+    """
+    Search for meta tags prefixed with a particular string.
+
+    Args:
+        tree: The parsed HTML document.
+        property: The name of the property which contains the tag name, e.g.
+            "property" for Open Graph.
+        prefix: The prefix on the property to search for, e.g. "og" for Open Graph.
+        property_mapper: An optional callable to map the property to the Open Graph
+            form. Can return None for a key to ignore that key.
+
+    Returns:
+        A map of tag name to value.
+    """
+    results: Dict[str, Optional[str]] = {}
+    for tag in tree.xpath(
+        f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
+    ):
+        # if we've got more than 50 tags, someone is taking the piss
+        if len(results) >= 50:
+            logger.warning(
+                "Skipping parsing of Open Graph for page with too many '%s:' tags",
+                prefix,
+            )
+            return {}
+
+        key = tag.attrib[property]
+        if property_mapper:
+            key = property_mapper(key)
+            # None is a special value used to ignore a value.
+            if key is None:
+                continue
+
+        results[key] = tag.attrib["content"]
+
+    return results
+
+
+def _map_twitter_to_open_graph(key: str) -> Optional[str]:
+    """
+    Map a Twitter card property to the analogous Open Graph property.
+
+    Args:
+        key: The Twitter card property (starts with "twitter:").
+
+    Returns:
+        The Open Graph property (starts with "og:") or None to have this property
+        be ignored.
+    """
+    # Twitter card properties with no analogous Open Graph property.
+    if key == "twitter:card" or key == "twitter:creator":
+        return None
+    if key == "twitter:site":
+        return "og:site_name"
+    # Otherwise, swap twitter to og.
+    return "og" + key[7:]
+
+
 def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
     """
     Parse the HTML document into an Open Graph response.
@@ -160,10 +233,8 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
         The Open Graph response as a dictionary.
     """
 
-    # if we see any image URLs in the OG response, then spider them
-    # (although the client could choose to do this by asking for previews of those
-    # URLs to avoid DoSing the server)
-
+    # Search for Open Graph (og:) meta tags, e.g.:
+    #
     # "og:type"         : "video",
     # "og:url"          : "https://www.youtube.com/watch?v=LXDBoHyjmtw",
     # "og:site_name"    : "YouTube",
@@ -176,19 +247,11 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
     # "og:video:height" : "720",
     # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3",
 
-    og: Dict[str, Optional[str]] = {}
-    for tag in tree.xpath(
-        "//*/meta[starts-with(@property, 'og:')][@content][not(@content='')]"
-    ):
-        # if we've got more than 50 tags, someone is taking the piss
-        if len(og) >= 50:
-            logger.warning("Skipping OG for page with too many 'og:' tags")
-            return {}
-
-        og[tag.attrib["property"]] = tag.attrib["content"]
-
-    # TODO: grab article: meta tags too, e.g.:
+    og = _get_meta_tags(tree, "property", "og")
 
+    # TODO: Search for properties specific to the different Open Graph types,
+    # such as article: meta tags, e.g.:
+    #
     # "article:publisher" : "https://www.facebook.com/thethudonline" />
     # "article:author" content="https://www.facebook.com/thethudonline" />
     # "article:tag" content="baby" />
@@ -196,6 +259,21 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
     # "article:published_time" content="2016-03-31T19:58:24+00:00" />
     # "article:modified_time" content="2016-04-01T18:31:53+00:00" />
 
+    # Search for Twitter Card (twitter:) meta tags, e.g.:
+    #
+    # "twitter:site"    : "@matrixdotorg"
+    # "twitter:creator" : "@matrixdotorg"
+    #
+    # Twitter cards tags also duplicate Open Graph tags.
+    #
+    # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started
+    twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph)
+    # Merge the Twitter values with the Open Graph values, but do not overwrite
+    # information from Open Graph tags.
+    for key, value in twitter.items():
+        if key not in og:
+            og[key] = value
+
     if "og:title" not in og:
         # Attempt to find a title from the title tag, or the biggest header on the page.
         title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py
index ea9e5889bf..cbdf210aef 100644
--- a/tests/rest/media/v1/test_html_preview.py
+++ b/tests/rest/media/v1/test_html_preview.py
@@ -370,6 +370,47 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
 
+    def test_twitter_tag(self) -> None:
+        """Twitter card tags should be used if nothing else is available."""
+        html = b"""
+        <html>
+        <meta name="twitter:card" content="summary">
+        <meta name="twitter:description" content="Description">
+        <meta name="twitter:site" content="@matrixdotorg">
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Description",
+                "og:site_name": "@matrixdotorg",
+            },
+        )
+
+        # But they shouldn't override Open Graph values.
+        html = b"""
+        <html>
+        <meta name="twitter:card" content="summary">
+        <meta name="twitter:description" content="Description">
+        <meta property="og:description" content="Real Description">
+        <meta name="twitter:site" content="@matrixdotorg">
+        <meta property="og:site_name" content="matrix.org">
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Real Description",
+                "og:site_name": "matrix.org",
+            },
+        )
+
 
 class MediaEncodingTestCase(unittest.TestCase):
     def test_meta_charset(self) -> None:

From 90cadcd403a5652a3f789ccfa8b608c639c0cc6d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 16 Jun 2022 12:43:21 +0100
Subject: [PATCH 011/178] Add a Subject Alternative Name to the certificate
 generated for Complement tests. (#13071)

---
 changelog.d/13071.misc                        |  1 +
 .../complement/conf/start_for_complement.sh   | 22 ++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13071.misc

diff --git a/changelog.d/13071.misc b/changelog.d/13071.misc
new file mode 100644
index 0000000000..a6e1e6b3a8
--- /dev/null
+++ b/changelog.d/13071.misc
@@ -0,0 +1 @@
+Add a Subject Alternative Name to the certificate generated for Complement tests.
\ No newline at end of file
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index 65da99b8da..773c7db22f 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -73,14 +73,30 @@ fi
 
 # Generate a TLS key, then generate a certificate by having Complement's CA sign it
 # Note that both the key and certificate are in PEM format (not DER).
+
+# First generate a configuration file to set up a Subject Alternative Name.
+cat > /conf/server.tls.conf <<EOF
+.include /etc/ssl/openssl.cnf
+
+[SAN]
+subjectAltName=DNS:${SERVER_NAME}
+EOF
+
+# Generate an RSA key
 openssl genrsa -out /conf/server.tls.key 2048
 
-openssl req -new -key /conf/server.tls.key -out /conf/server.tls.csr \
-  -subj "/CN=${SERVER_NAME}"
+# Generate a certificate signing request
+openssl req -new -config /conf/server.tls.conf -key /conf/server.tls.key -out /conf/server.tls.csr \
+  -subj "/CN=${SERVER_NAME}" -reqexts SAN
 
+# Make the Complement Certificate Authority sign and generate a certificate.
 openssl x509 -req -in /conf/server.tls.csr \
   -CA /complement/ca/ca.crt -CAkey /complement/ca/ca.key -set_serial 1 \
-  -out /conf/server.tls.crt
+  -out /conf/server.tls.crt -extfile /conf/server.tls.conf -extensions SAN
+
+# Assert that we have a Subject Alternative Name in the certificate.
+# (grep will exit with 1 here if there isn't a SAN in the certificate.)
+openssl x509 -in /conf/server.tls.crt -noout -text | grep DNS:
 
 export SYNAPSE_TLS_CERT=/conf/server.tls.crt
 export SYNAPSE_TLS_KEY=/conf/server.tls.key

From 8ceed5e6b5449f81408236d7789eb1e5e21a1b4b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 16 Jun 2022 18:50:46 +0100
Subject: [PATCH 012/178] Add desc to `get_earliest_token_for_stats` (#13085)

---
 changelog.d/13085.misc                  | 1 +
 synapse/storage/databases/main/stats.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/13085.misc

diff --git a/changelog.d/13085.misc b/changelog.d/13085.misc
new file mode 100644
index 0000000000..2401d4f388
--- /dev/null
+++ b/changelog.d/13085.misc
@@ -0,0 +1 @@
+Correctly report prometheus DB stats for `get_earliest_token_for_stats`.
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 538451b05f..82851ffa95 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -295,6 +295,7 @@ class StatsStore(StateDeltasStore):
             keyvalues={id_col: id},
             retcol="completed_delta_stream_id",
             allow_none=True,
+            desc="get_earliest_token_for_stats",
         )
 
     async def bulk_update_stats_delta(

From 9372f6f842e3f8c0166d68a3a49ccc73a76954ea Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 17 Jun 2022 10:22:50 +0100
Subject: [PATCH 013/178] Fix logging context misuse when we fail to persist a
 federation event (#13089)

When we fail to persist a federation event, we kick off a task to remove
its push actions in the background, using the current logging context.
Since we don't `await` that task, we may finish our logging context
before the task finishes. There's no reason to not `await` the task, so
let's do that.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13089.misc               | 1 +
 synapse/handlers/federation_event.py | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13089.misc

diff --git a/changelog.d/13089.misc b/changelog.d/13089.misc
new file mode 100644
index 0000000000..5868507cb7
--- /dev/null
+++ b/changelog.d/13089.misc
@@ -0,0 +1 @@
+Fix a long-standing bug where a finished logging context would be re-started when Synapse failed to persist an event from federation.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 565ffd7cfd..b7c54e642f 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -57,7 +57,7 @@ from synapse.event_auth import (
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.federation.federation_client import InvalidResponseError
-from synapse.logging.context import nested_logging_context, run_in_background
+from synapse.logging.context import nested_logging_context
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
 from synapse.replication.http.federation import (
@@ -1964,9 +1964,7 @@ class FederationEventHandler:
                 event.room_id, [(event, context)], backfilled=backfilled
             )
         except Exception:
-            run_in_background(
-                self._store.remove_push_actions_from_staging, event.event_id
-            )
+            await self._store.remove_push_actions_from_staging(event.event_id)
             raise
 
     async def persist_events_and_notify(

From c6d617641186221829c644204f24654430858826 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 17 Jun 2022 12:39:26 +0200
Subject: [PATCH 014/178] Allow MSC3030 'timestamp_to_event' calls from anyone
 on world-readable rooms. (#13062)

Signed-off-by: Quentin Gliech <quenting@element.io>
---
 changelog.d/13062.misc      | 1 +
 synapse/rest/client/room.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13062.misc

diff --git a/changelog.d/13062.misc b/changelog.d/13062.misc
new file mode 100644
index 0000000000..d425e9a9ac
--- /dev/null
+++ b/changelog.d/13062.misc
@@ -0,0 +1 @@
+Allow MSC3030 'timestamp_to_event' calls from anyone on world-readable rooms.
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index a26e976492..2f513164cb 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1177,7 +1177,9 @@ class TimestampLookupRestServlet(RestServlet):
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self._auth.get_user_by_req(request)
-        await self._auth.check_user_in_room(room_id, requester.user.to_string())
+        await self._auth.check_user_in_room_or_world_readable(
+            room_id, requester.user.to_string()
+        )
 
         timestamp = parse_integer(request, "ts", required=True)
         direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])

From 5099b5ecc735b98ac9d559ef6191554bafff964b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 17 Jun 2022 11:42:03 +0100
Subject: [PATCH 015/178] Use new `device_list_changes_in_room` table when
 getting device list changes (#13045)

---
 changelog.d/13045.feature                 |  1 +
 synapse/handlers/device.py                | 69 +++++++++++++++++------
 synapse/handlers/sync.py                  | 19 ++-----
 synapse/storage/databases/main/devices.py | 59 +++++++++++++++++++
 4 files changed, 117 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/13045.feature

diff --git a/changelog.d/13045.feature b/changelog.d/13045.feature
new file mode 100644
index 0000000000..7b0667ba95
--- /dev/null
+++ b/changelog.d/13045.feature
@@ -0,0 +1 @@
+Speed up fetching of device list changes in `/sync` and `/keys/changes`.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index b79c551703..c05a170c55 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -123,6 +123,43 @@ class DeviceWorkerHandler:
 
         return device
 
+    async def get_device_changes_in_shared_rooms(
+        self, user_id: str, room_ids: Collection[str], from_token: StreamToken
+    ) -> Collection[str]:
+        """Get the set of users whose devices have changed who share a room with
+        the given user.
+        """
+        changed_users = await self.store.get_device_list_changes_in_rooms(
+            room_ids, from_token.device_list_key
+        )
+
+        if changed_users is not None:
+            # We also check if the given user has changed their device. If
+            # they're in no rooms then the above query won't include them.
+            changed = await self.store.get_users_whose_devices_changed(
+                from_token.device_list_key, [user_id]
+            )
+            changed_users.update(changed)
+            return changed_users
+
+        # If the DB returned None then the `from_token` is too old, so we fall
+        # back on looking for device updates for all users.
+
+        users_who_share_room = await self.store.get_users_who_share_room_with_user(
+            user_id
+        )
+
+        tracked_users = set(users_who_share_room)
+
+        # Always tell the user about their own devices
+        tracked_users.add(user_id)
+
+        changed = await self.store.get_users_whose_devices_changed(
+            from_token.device_list_key, tracked_users
+        )
+
+        return changed
+
     @trace
     @measure_func("device.get_user_ids_changed")
     async def get_user_ids_changed(
@@ -138,19 +175,8 @@ class DeviceWorkerHandler:
 
         room_ids = await self.store.get_rooms_for_user(user_id)
 
-        # First we check if any devices have changed for users that we share
-        # rooms with.
-        users_who_share_room = await self.store.get_users_who_share_room_with_user(
-            user_id
-        )
-
-        tracked_users = set(users_who_share_room)
-
-        # Always tell the user about their own devices
-        tracked_users.add(user_id)
-
-        changed = await self.store.get_users_whose_devices_changed(
-            from_token.device_list_key, tracked_users
+        changed = await self.get_device_changes_in_shared_rooms(
+            user_id, room_ids, from_token
         )
 
         # Then work out if any users have since joined
@@ -237,10 +263,19 @@ class DeviceWorkerHandler:
                         break
 
         if possibly_changed or possibly_left:
-            # Take the intersection of the users whose devices may have changed
-            # and those that actually still share a room with the user
-            possibly_joined = possibly_changed & users_who_share_room
-            possibly_left = (possibly_changed | possibly_left) - users_who_share_room
+            possibly_joined = possibly_changed
+            possibly_left = possibly_changed | possibly_left
+
+            # Double check if we still share rooms with the given user.
+            users_rooms = await self.store.get_rooms_for_users_with_stream_ordering(
+                possibly_left
+            )
+            for changed_user_id, entries in users_rooms.items():
+                if any(e.room_id in room_ids for e in entries):
+                    possibly_left.discard(changed_user_id)
+                else:
+                    possibly_joined.discard(changed_user_id)
+
         else:
             possibly_joined = set()
             possibly_left = set()
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 6ad053f678..d42a414c90 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -240,6 +240,7 @@ class SyncHandler:
         self.auth_blocking = hs.get_auth_blocking()
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
+        self._device_handler = hs.get_device_handler()
 
         # TODO: flush cache entries on subsequent sync request.
         #    Once we get the next /sync request (ie, one with the same access token
@@ -1268,21 +1269,11 @@ class SyncHandler:
                     ):
                         users_that_have_changed.add(changed_user_id)
             else:
-                users_who_share_room = (
-                    await self.store.get_users_who_share_room_with_user(user_id)
-                )
-
-                # Always tell the user about their own devices. We check as the user
-                # ID is almost certainly already included (unless they're not in any
-                # rooms) and taking a copy of the set is relatively expensive.
-                if user_id not in users_who_share_room:
-                    users_who_share_room = set(users_who_share_room)
-                    users_who_share_room.add(user_id)
-
-                tracked_users = users_who_share_room
                 users_that_have_changed = (
-                    await self.store.get_users_whose_devices_changed(
-                        since_token.device_list_key, tracked_users
+                    await self._device_handler.get_device_changes_in_shared_rooms(
+                        user_id,
+                        sync_result_builder.joined_room_ids,
+                        from_token=since_token,
                     )
                 )
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 03d1334e03..93d980786e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1208,6 +1208,65 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
 
         return devices
 
+    @cached()
+    async def _get_min_device_lists_changes_in_room(self) -> int:
+        """Returns the minimum stream ID that we have entries for
+        `device_lists_changes_in_room`
+        """
+
+        return await self.db_pool.simple_select_one_onecol(
+            table="device_lists_changes_in_room",
+            keyvalues={},
+            retcol="COALESCE(MIN(stream_id), 0)",
+            desc="get_min_device_lists_changes_in_room",
+        )
+
+    async def get_device_list_changes_in_rooms(
+        self, room_ids: Collection[str], from_id: int
+    ) -> Optional[Set[str]]:
+        """Return the set of users whose devices have changed in the given rooms
+        since the given stream ID.
+
+        Returns None if the given stream ID is too old.
+        """
+
+        if not room_ids:
+            return set()
+
+        min_stream_id = await self._get_min_device_lists_changes_in_room()
+
+        if min_stream_id > from_id:
+            return None
+
+        sql = """
+            SELECT DISTINCT user_id FROM device_lists_changes_in_room
+            WHERE {clause} AND stream_id >= ?
+        """
+
+        def _get_device_list_changes_in_rooms_txn(
+            txn: LoggingTransaction,
+            clause,
+            args,
+        ) -> Set[str]:
+            txn.execute(sql.format(clause=clause), args)
+            return {user_id for user_id, in txn}
+
+        changes = set()
+        for chunk in batch_iter(room_ids, 1000):
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "room_id", chunk
+            )
+            args.append(from_id)
+
+            changes |= await self.db_pool.runInteraction(
+                "get_device_list_changes_in_rooms",
+                _get_device_list_changes_in_rooms_txn,
+                clause,
+                args,
+            )
+
+        return changes
+
 
 class DeviceBackgroundUpdateStore(SQLBaseStore):
     def __init__(

From 5ef05c70c30ec06376c48f443c5722fbf5dd2aa0 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 17 Jun 2022 11:58:00 +0100
Subject: [PATCH 016/178] Rotate notifications more frequently (#13096)

---
 changelog.d/13096.misc                               | 1 +
 synapse/storage/databases/main/event_push_actions.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13096.misc

diff --git a/changelog.d/13096.misc b/changelog.d/13096.misc
new file mode 100644
index 0000000000..3bb51962e7
--- /dev/null
+++ b/changelog.d/13096.misc
@@ -0,0 +1 @@
+Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index ae705889a5..10a7962382 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -148,7 +148,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         self._doing_notif_rotation = False
         if hs.config.worker.run_background_tasks:
             self._rotate_notif_loop = self._clock.looping_call(
-                self._rotate_notifs, 30 * 60 * 1000
+                self._rotate_notifs, 30 * 1000
             )
 
         self.db_pool.updates.register_background_index_update(

From 73af10f419346a5f2d70131ac1ed8e69942edca0 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 17 Jun 2022 13:19:22 +0200
Subject: [PATCH 017/178] Simplify the alias deletion logic as an application
 service. (#13093)

---
 changelog.d/13093.misc              |  1 +
 synapse/rest/client/directory.py    | 35 +++++++++++------------------
 tests/rest/client/test_directory.py | 34 ++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/13093.misc

diff --git a/changelog.d/13093.misc b/changelog.d/13093.misc
new file mode 100644
index 0000000000..2547c87fa4
--- /dev/null
+++ b/changelog.d/13093.misc
@@ -0,0 +1 @@
+Simplify the alias deletion logic as an application service.
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index e181a0dde2..9639d4fe2c 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -17,13 +17,7 @@ from typing import TYPE_CHECKING, Tuple
 
 from twisted.web.server import Request
 
-from synapse.api.errors import (
-    AuthError,
-    Codes,
-    InvalidClientCredentialsError,
-    NotFoundError,
-    SynapseError,
-)
+from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -96,30 +90,27 @@ class ClientDirectoryServer(RestServlet):
         self, request: SynapseRequest, room_alias: str
     ) -> Tuple[int, JsonDict]:
         room_alias_obj = RoomAlias.from_string(room_alias)
+        requester = await self.auth.get_user_by_req(request)
 
-        try:
-            service = self.auth.get_appservice_by_req(request)
+        if requester.app_service:
             await self.directory_handler.delete_appservice_association(
-                service, room_alias_obj
+                requester.app_service, room_alias_obj
             )
+
             logger.info(
                 "Application service at %s deleted alias %s",
-                service.url,
+                requester.app_service.url,
                 room_alias_obj.to_string(),
             )
-            return 200, {}
-        except InvalidClientCredentialsError:
-            # fallback to default user behaviour if they aren't an AS
-            pass
 
-        requester = await self.auth.get_user_by_req(request)
-        user = requester.user
+        else:
+            await self.directory_handler.delete_association(requester, room_alias_obj)
 
-        await self.directory_handler.delete_association(requester, room_alias_obj)
-
-        logger.info(
-            "User %s deleted alias %s", user.to_string(), room_alias_obj.to_string()
-        )
+            logger.info(
+                "User %s deleted alias %s",
+                requester.user.to_string(),
+                room_alias_obj.to_string(),
+            )
 
         return 200, {}
 
diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py
index aca03afd0e..67473a68d7 100644
--- a/tests/rest/client/test_directory.py
+++ b/tests/rest/client/test_directory.py
@@ -16,6 +16,7 @@ from http import HTTPStatus
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.appservice import ApplicationService
 from synapse.rest import admin
 from synapse.rest.client import directory, login, room
 from synapse.server import HomeServer
@@ -129,6 +130,39 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
+    def test_deleting_alias_via_directory_appservice(self) -> None:
+        user_id = "@as:test"
+        as_token = "i_am_an_app_service"
+
+        appservice = ApplicationService(
+            as_token,
+            id="1234",
+            namespaces={"aliases": [{"regex": "#asns-*", "exclusive": True}]},
+            sender=user_id,
+        )
+        self.hs.get_datastores().main.services_cache.append(appservice)
+
+        # Add an alias for the room, as the appservice
+        alias = RoomAlias(f"asns-{random_string(5)}", self.hs.hostname).to_string()
+        data = {"room_id": self.room_id}
+        request_data = json.dumps(data)
+
+        channel = self.make_request(
+            "PUT",
+            f"/_matrix/client/r0/directory/room/{alias}",
+            request_data,
+            access_token=as_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
+        # Then try to remove the alias, as the appservice
+        channel = self.make_request(
+            "DELETE",
+            f"/_matrix/client/r0/directory/room/{alias}",
+            access_token=as_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
     def test_deleting_nonexistant_alias(self) -> None:
         # Check that no alias exists
         alias = "#potato:test"

From 5d6f55959e8dfdfa194fd1ea955ef714114e5a71 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 17 Jun 2022 12:47:22 +0100
Subject: [PATCH 018/178] Update info on downstream debs (#13095)

---
 changelog.d/13095.doc      |  1 +
 docs/setup/installation.md | 17 ++++++++---------
 2 files changed, 9 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13095.doc

diff --git a/changelog.d/13095.doc b/changelog.d/13095.doc
new file mode 100644
index 0000000000..4651f25e14
--- /dev/null
+++ b/changelog.d/13095.doc
@@ -0,0 +1 @@
+Update information on downstream Debian packages.
diff --git a/docs/setup/installation.md b/docs/setup/installation.md
index 69ade036c3..5bdefe2bc1 100644
--- a/docs/setup/installation.md
+++ b/docs/setup/installation.md
@@ -84,20 +84,19 @@ file when you upgrade the Debian package to a later version.
 
 ##### Downstream Debian packages
 
-We do not recommend using the packages from the default Debian `buster`
-repository at this time, as they are old and suffer from known security
-vulnerabilities. You can install the latest version of Synapse from
-[our repository](#matrixorg-packages) or from `buster-backports`. Please
-see the [Debian documentation](https://backports.debian.org/Instructions/)
-for information on how to use backports.
-
-If you are using Debian `sid` or testing, Synapse is available in the default
-repositories and it should be possible to install it simply with:
+Andrej Shadura maintains a `matrix-synapse` package in the Debian repositories.
+For `bookworm` and `sid`, it can be installed simply with:
 
 ```sh
 sudo apt install matrix-synapse
 ```
 
+Synapse is also avaliable in `bullseye-backports`.  Please
+see the [Debian documentation](https://backports.debian.org/Instructions/)
+for information on how to use backports.
+
+`matrix-synapse` is no longer maintained for `buster` and older.
+
 ##### Downstream Ubuntu packages
 
 We do not recommend using the packages in the default Ubuntu repository

From b26cbe3d4573c22b8a1743ae65db4f61770e69e9 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 17 Jun 2022 13:05:27 +0100
Subject: [PATCH 019/178] Fix type error that made its way onto develop
 (#13098)

* Fix type error introduced accidentally by #13045

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/13098.feature                 | 1 +
 synapse/storage/databases/main/devices.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13098.feature

diff --git a/changelog.d/13098.feature b/changelog.d/13098.feature
new file mode 100644
index 0000000000..7b0667ba95
--- /dev/null
+++ b/changelog.d/13098.feature
@@ -0,0 +1 @@
+Speed up fetching of device list changes in `/sync` and `/keys/changes`.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 93d980786e..adde5d0978 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1245,8 +1245,8 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
 
         def _get_device_list_changes_in_rooms_txn(
             txn: LoggingTransaction,
-            clause,
-            args,
+            clause: str,
+            args: List[Any],
         ) -> Set[str]:
             txn.execute(sql.format(clause=clause), args)
             return {user_id for user_id, in txn}

From d3d84685ce1acc05cbec00d2934548473850f9d0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 17 Jun 2022 08:38:13 -0400
Subject: [PATCH 020/178] Add type hints to event push actions tests. (#13099)

---
 changelog.d/12985.misc                   |  2 +-
 changelog.d/13099.misc                   |  1 +
 tests/storage/test_event_push_actions.py | 28 ++++++++++++++----------
 3 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13099.misc

diff --git a/changelog.d/12985.misc b/changelog.d/12985.misc
index d5ab9eedea..7f6492d587 100644
--- a/changelog.d/12985.misc
+++ b/changelog.d/12985.misc
@@ -1 +1 @@
-Add type annotations to `tests.state.test_v2`.
+Add type hints to tests.
diff --git a/changelog.d/13099.misc b/changelog.d/13099.misc
new file mode 100644
index 0000000000..7f6492d587
--- /dev/null
+++ b/changelog.d/13099.misc
@@ -0,0 +1 @@
+Add type hints to tests.
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 4273524c4c..2ac5f6db5e 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -14,7 +14,11 @@
 
 from unittest.mock import Mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
 from synapse.storage.databases.main.event_push_actions import NotifCounts
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -29,31 +33,33 @@ HIGHLIGHT = [
 
 
 class EventPushActionsStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
-        self.persist_events_store = hs.get_datastores().persist_events
+        persist_events_store = hs.get_datastores().persist_events
+        assert persist_events_store is not None
+        self.persist_events_store = persist_events_store
 
-    def test_get_unread_push_actions_for_user_in_range_for_http(self):
+    def test_get_unread_push_actions_for_user_in_range_for_http(self) -> None:
         self.get_success(
             self.store.get_unread_push_actions_for_user_in_range_for_http(
                 USER_ID, 0, 1000, 20
             )
         )
 
-    def test_get_unread_push_actions_for_user_in_range_for_email(self):
+    def test_get_unread_push_actions_for_user_in_range_for_email(self) -> None:
         self.get_success(
             self.store.get_unread_push_actions_for_user_in_range_for_email(
                 USER_ID, 0, 1000, 20
             )
         )
 
-    def test_count_aggregation(self):
+    def test_count_aggregation(self) -> None:
         room_id = "!foo:example.com"
         user_id = "@user1235:example.com"
 
         last_read_stream_ordering = [0]
 
-        def _assert_counts(noitf_count, highlight_count):
+        def _assert_counts(noitf_count: int, highlight_count: int) -> None:
             counts = self.get_success(
                 self.store.db_pool.runInteraction(
                     "",
@@ -72,7 +78,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 ),
             )
 
-        def _inject_actions(stream, action):
+        def _inject_actions(stream: int, action: list) -> None:
             event = Mock()
             event.room_id = room_id
             event.event_id = "$test:example.com"
@@ -96,14 +102,14 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 )
             )
 
-        def _rotate(stream):
+        def _rotate(stream: int) -> None:
             self.get_success(
                 self.store.db_pool.runInteraction(
                     "", self.store._rotate_notifs_before_txn, stream
                 )
             )
 
-        def _mark_read(stream, depth):
+        def _mark_read(stream: int, depth: int) -> None:
             last_read_stream_ordering[0] = stream
             self.get_success(
                 self.store.db_pool.runInteraction(
@@ -165,8 +171,8 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _mark_read(10, 10)
         _assert_counts(0, 0)
 
-    def test_find_first_stream_ordering_after_ts(self):
-        def add_event(so, ts):
+    def test_find_first_stream_ordering_after_ts(self) -> None:
+        def add_event(so: int, ts: int) -> None:
             self.get_success(
                 self.store.db_pool.simple_insert(
                     "events",

From e16ea87d0f8c4c30cad36f85488eb1f647e640b0 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 17 Jun 2022 14:56:46 +0100
Subject: [PATCH 021/178] Fix inconsistencies in event validation for
 `m.room.create` events (#13087)

* Extend the auth rule checks for `m.room.create` events

... and move them up to the top of the function. Since the no auth_events are
allowed for m.room.create events, we may as well get the m.room.create event
checks out of the way first.

* Add a test for create events with prev_events
---
 changelog.d/13087.bugfix |  1 +
 synapse/event_auth.py    | 67 ++++++++++++++++++++++++++--------------
 tests/test_event_auth.py | 45 +++++++++++++++++++++++++--
 3 files changed, 88 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/13087.bugfix

diff --git a/changelog.d/13087.bugfix b/changelog.d/13087.bugfix
new file mode 100644
index 0000000000..7c69801afe
--- /dev/null
+++ b/changelog.d/13087.bugfix
@@ -0,0 +1 @@
+Fix some inconsistencies in the event authentication code.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 360a50cc71..440b1ae418 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -141,6 +141,15 @@ async def check_state_independent_auth_rules(
     Raises:
         AuthError if the checks fail
     """
+    # Implementation of https://spec.matrix.org/v1.2/rooms/v9/#authorization-rules
+
+    # 1. If type is m.room.create:
+    if event.type == EventTypes.Create:
+        _check_create(event)
+
+        # 1.5 Otherwise, allow
+        return
+
     # Check the auth events.
     auth_events = await store.get_events(
         event.auth_event_ids(),
@@ -180,29 +189,6 @@ async def check_state_independent_auth_rules(
 
         auth_dict[(auth_event.type, auth_event.state_key)] = auth_event_id
 
-    # Implementation of https://matrix.org/docs/spec/rooms/v1#authorization-rules
-    #
-    # 1. If type is m.room.create:
-    if event.type == EventTypes.Create:
-        # 1b. If the domain of the room_id does not match the domain of the sender,
-        # reject.
-        sender_domain = get_domain_from_id(event.sender)
-        room_id_domain = get_domain_from_id(event.room_id)
-        if room_id_domain != sender_domain:
-            raise AuthError(
-                403, "Creation event's room_id domain does not match sender's"
-            )
-
-        # 1c. If content.room_version is present and is not a recognised version, reject
-        room_version_prop = event.content.get("room_version", "1")
-        if room_version_prop not in KNOWN_ROOM_VERSIONS:
-            raise AuthError(
-                403,
-                "room appears to have unsupported version %s" % (room_version_prop,),
-            )
-
-        return
-
     # 3. If event does not have a m.room.create in its auth_events, reject.
     creation_event = auth_dict.get((EventTypes.Create, ""), None)
     if not creation_event:
@@ -324,6 +310,41 @@ def _check_size_limits(event: "EventBase") -> None:
         raise EventSizeError("event too large")
 
 
+def _check_create(event: "EventBase") -> None:
+    """Implementation of the auth rules for m.room.create events
+
+    Args:
+        event: The `m.room.create` event to be checked
+
+    Raises:
+        AuthError if the event does not pass the auth rules
+    """
+    assert event.type == EventTypes.Create
+
+    #  1.1 If it has any previous events, reject.
+    if event.prev_event_ids():
+        raise AuthError(403, "Create event has prev events")
+
+    # 1.2 If the domain of the room_id does not match the domain of the sender,
+    # reject.
+    sender_domain = get_domain_from_id(event.sender)
+    room_id_domain = get_domain_from_id(event.room_id)
+    if room_id_domain != sender_domain:
+        raise AuthError(403, "Creation event's room_id domain does not match sender's")
+
+    # 1.3 If content.room_version is present and is not a recognised version, reject
+    room_version_prop = event.content.get("room_version", "1")
+    if room_version_prop not in KNOWN_ROOM_VERSIONS:
+        raise AuthError(
+            403,
+            "room appears to have unsupported version %s" % (room_version_prop,),
+        )
+
+    # 1.4 If content has no creator field, reject.
+    if EventContentFields.ROOM_CREATOR not in event.content:
+        raise AuthError(403, "Create event lacks a 'creator' property")
+
+
 def _can_federate(event: "EventBase", auth_events: StateMap["EventBase"]) -> bool:
     creation_event = auth_events.get((EventTypes.Create, ""))
     # There should always be a creation event, but if not don't federate.
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index e8e458cfd3..ed7a3cbcee 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -109,6 +109,47 @@ class EventAuthTestCase(unittest.TestCase):
                 )
             )
 
+    def test_create_event_with_prev_events(self):
+        """A create event with prev_events should be rejected
+
+        https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules
+        1: If type is m.room.create:
+            1. If it has any previous events, reject.
+        """
+        creator = f"@creator:{TEST_DOMAIN}"
+
+        # we make both a good event and a bad event, to check that we are rejecting
+        # the bad event for the reason we think we are.
+        good_event = make_event_from_dict(
+            {
+                "room_id": TEST_ROOM_ID,
+                "type": "m.room.create",
+                "state_key": "",
+                "sender": creator,
+                "content": {
+                    "creator": creator,
+                    "room_version": RoomVersions.V9.identifier,
+                },
+                "auth_events": [],
+                "prev_events": [],
+            },
+            room_version=RoomVersions.V9,
+        )
+        bad_event = make_event_from_dict(
+            {**good_event.get_dict(), "prev_events": ["$fakeevent"]},
+            room_version=RoomVersions.V9,
+        )
+
+        event_store = _StubEventSourceStore()
+
+        get_awaitable_result(
+            event_auth.check_state_independent_auth_rules(event_store, good_event)
+        )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(event_store, bad_event)
+            )
+
     def test_random_users_cannot_send_state_before_first_pl(self):
         """
         Check that, before the first PL lands, the creator is the only user
@@ -564,8 +605,8 @@ class EventAuthTestCase(unittest.TestCase):
 
 
 # helpers for making events
-
-TEST_ROOM_ID = "!test:room"
+TEST_DOMAIN = "example.com"
+TEST_ROOM_ID = f"!test_room:{TEST_DOMAIN}"
 
 
 def _create_event(

From d4b1c0d800eaa83c4d56a9cf17881ad362b9194b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 17 Jun 2022 16:30:59 +0100
Subject: [PATCH 022/178] Fix inconsistencies in event validation (#13088)

---
 changelog.d/13088.bugfix                |  1 +
 synapse/event_auth.py                   | 23 ++++++-
 tests/handlers/test_federation.py       | 14 ++--
 tests/handlers/test_federation_event.py |  1 -
 tests/test_event_auth.py                | 86 +++++++++++++++++++++++++
 5 files changed, 118 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13088.bugfix

diff --git a/changelog.d/13088.bugfix b/changelog.d/13088.bugfix
new file mode 100644
index 0000000000..7c69801afe
--- /dev/null
+++ b/changelog.d/13088.bugfix
@@ -0,0 +1 @@
+Fix some inconsistencies in the event authentication code.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 440b1ae418..0fc2c4b27e 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -150,7 +150,7 @@ async def check_state_independent_auth_rules(
         # 1.5 Otherwise, allow
         return
 
-    # Check the auth events.
+    # 2. Reject if event has auth_events that: ...
     auth_events = await store.get_events(
         event.auth_event_ids(),
         redact_behaviour=EventRedactBehaviour.as_is,
@@ -158,6 +158,7 @@ async def check_state_independent_auth_rules(
     )
     room_id = event.room_id
     auth_dict: MutableStateMap[str] = {}
+    expected_auth_types = auth_types_for_event(event.room_version, event)
     for auth_event_id in event.auth_event_ids():
         auth_event = auth_events.get(auth_event_id)
 
@@ -179,6 +180,24 @@ async def check_state_independent_auth_rules(
                 % (event.event_id, room_id, auth_event_id, auth_event.room_id),
             )
 
+        k = (auth_event.type, auth_event.state_key)
+
+        # 2.1 ... have duplicate entries for a given type and state_key pair
+        if k in auth_dict:
+            raise AuthError(
+                403,
+                f"Event {event.event_id} has duplicate auth_events for {k}: {auth_dict[k]} and {auth_event_id}",
+            )
+
+        # 2.2 ... have entries whose type and state_key don’t match those specified by
+        #   the auth events selection algorithm described in the server
+        #   specification.
+        if k not in expected_auth_types:
+            raise AuthError(
+                403,
+                f"Event {event.event_id} has unexpected auth_event for {k}: {auth_event_id}",
+            )
+
         # We also need to check that the auth event itself is not rejected.
         if auth_event.rejected_reason:
             raise AuthError(
@@ -187,7 +206,7 @@ async def check_state_independent_auth_rules(
                 % (event.event_id, auth_event.event_id),
             )
 
-        auth_dict[(auth_event.type, auth_event.state_key)] = auth_event_id
+        auth_dict[k] = auth_event_id
 
     # 3. If event does not have a m.room.create in its auth_events, reject.
     creation_event = auth_dict.get((EventTypes.Create, ""), None)
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 9afba7b0e8..9b9c11fab7 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -225,9 +225,10 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
 
         # we need a user on the remote server to be a member, so that we can send
         # extremity-causing events.
+        remote_server_user_id = f"@user:{self.OTHER_SERVER_NAME}"
         self.get_success(
             event_injection.inject_member_event(
-                self.hs, room_id, f"@user:{self.OTHER_SERVER_NAME}", "join"
+                self.hs, room_id, remote_server_user_id, "join"
             )
         )
 
@@ -247,6 +248,12 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
         # create more than is 5 which corresponds to the number of backward
         # extremities we slice off in `_maybe_backfill_inner`
         federation_event_handler = self.hs.get_federation_event_handler()
+        auth_events = [
+            ev
+            for ev in current_state
+            if (ev.type, ev.state_key)
+            in {("m.room.create", ""), ("m.room.member", remote_server_user_id)}
+        ]
         for _ in range(0, 8):
             event = make_event_from_dict(
                 self.add_hashes_and_signatures(
@@ -258,15 +265,14 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
                             "body": "message connected to fake event",
                         },
                         "room_id": room_id,
-                        "sender": f"@user:{self.OTHER_SERVER_NAME}",
+                        "sender": remote_server_user_id,
                         "prev_events": [
                             ev1.event_id,
                             # We're creating an backward extremity each time thanks
                             # to this fake event
                             generate_fake_event_id(),
                         ],
-                        # lazy: *everything* is an auth event
-                        "auth_events": [ev.event_id for ev in current_state],
+                        "auth_events": [ev.event_id for ev in auth_events],
                         "depth": ev1.depth + 1,
                     },
                     room_version,
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index 1a36c25c41..4b1a8f04db 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -98,7 +98,6 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
         auth_event_ids = [
             initial_state_map[("m.room.create", "")],
             initial_state_map[("m.room.power_levels", "")],
-            initial_state_map[("m.room.join_rules", "")],
             member_event.event_id,
         ]
 
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index ed7a3cbcee..371cd201af 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -150,6 +150,92 @@ class EventAuthTestCase(unittest.TestCase):
                 event_auth.check_state_independent_auth_rules(event_store, bad_event)
             )
 
+    def test_duplicate_auth_events(self):
+        """Events with duplicate auth_events should be rejected
+
+        https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules
+        2. Reject if event has auth_events that:
+            1. have duplicate entries for a given type and state_key pair
+        """
+        creator = "@creator:example.com"
+
+        create_event = _create_event(RoomVersions.V9, creator)
+        join_event1 = _join_event(RoomVersions.V9, creator)
+        pl_event = _power_levels_event(
+            RoomVersions.V9,
+            creator,
+            {"state_default": 30, "users": {"creator": 100}},
+        )
+
+        # create a second join event, so that we can make a duplicate
+        join_event2 = _join_event(RoomVersions.V9, creator)
+
+        event_store = _StubEventSourceStore()
+        event_store.add_events([create_event, join_event1, join_event2, pl_event])
+
+        good_event = _random_state_event(
+            RoomVersions.V9, creator, [create_event, join_event2, pl_event]
+        )
+        bad_event = _random_state_event(
+            RoomVersions.V9, creator, [create_event, join_event1, join_event2, pl_event]
+        )
+        # a variation: two instances of the *same* event
+        bad_event2 = _random_state_event(
+            RoomVersions.V9, creator, [create_event, join_event2, join_event2, pl_event]
+        )
+
+        get_awaitable_result(
+            event_auth.check_state_independent_auth_rules(event_store, good_event)
+        )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(event_store, bad_event)
+            )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(event_store, bad_event2)
+            )
+
+    def test_unexpected_auth_events(self):
+        """Events with excess auth_events should be rejected
+
+        https://spec.matrix.org/v1.3/rooms/v9/#authorization-rules
+        2. Reject if event has auth_events that:
+           2. have entries whose type and state_key don’t match those specified by the
+              auth events selection algorithm described in the server specification.
+        """
+        creator = "@creator:example.com"
+
+        create_event = _create_event(RoomVersions.V9, creator)
+        join_event = _join_event(RoomVersions.V9, creator)
+        pl_event = _power_levels_event(
+            RoomVersions.V9,
+            creator,
+            {"state_default": 30, "users": {"creator": 100}},
+        )
+        join_rules_event = _join_rules_event(RoomVersions.V9, creator, "public")
+
+        event_store = _StubEventSourceStore()
+        event_store.add_events([create_event, join_event, pl_event, join_rules_event])
+
+        good_event = _random_state_event(
+            RoomVersions.V9, creator, [create_event, join_event, pl_event]
+        )
+        # join rules should *not* be included in the auth events.
+        bad_event = _random_state_event(
+            RoomVersions.V9,
+            creator,
+            [create_event, join_event, pl_event, join_rules_event],
+        )
+
+        get_awaitable_result(
+            event_auth.check_state_independent_auth_rules(event_store, good_event)
+        )
+        with self.assertRaises(AuthError):
+            get_awaitable_result(
+                event_auth.check_state_independent_auth_rules(event_store, bad_event)
+            )
+
     def test_random_users_cannot_send_state_before_first_pl(self):
         """
         Check that, before the first PL lands, the creator is the only user

From 3d94d07db39bf29f9742c95e19b52b8ffcf6baa7 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 17 Jun 2022 10:47:38 -0700
Subject: [PATCH 023/178] Update opentracing docs to reference the
 configuration manual rather than the configuation file. (#13076)

---
 changelog.d/13076.doc | 1 +
 docs/opentracing.md   | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13076.doc

diff --git a/changelog.d/13076.doc b/changelog.d/13076.doc
new file mode 100644
index 0000000000..75dc4630ea
--- /dev/null
+++ b/changelog.d/13076.doc
@@ -0,0 +1 @@
+Update OpenTracing docs to reference the configuration manual rather than the configuration file.
diff --git a/docs/opentracing.md b/docs/opentracing.md
index f91362f112..abb94b565f 100644
--- a/docs/opentracing.md
+++ b/docs/opentracing.md
@@ -57,8 +57,9 @@ https://www.jaegertracing.io/docs/latest/getting-started.
 ## Enable OpenTracing in Synapse
 
 OpenTracing is not enabled by default. It must be enabled in the
-homeserver config by uncommenting the config options under `opentracing`
-as shown in the [sample config](./sample_config.yaml). For example:
+homeserver config by adding the `opentracing` option to your config file. You can find 
+documentation about how to do this in the [config manual under the header 'Opentracing'](usage/configuration/config_documentation.md#opentracing).
+See below for an example Opentracing configuration: 
 
 ```yaml
 opentracing:

From f33356e8f86f5271376467febfad0936e4e8a72d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 17 Jun 2022 19:07:04 +0100
Subject: [PATCH 024/178] Use caret (semver bounds) for matrix.org packages
 (#13082)

---
 .ci/scripts/test_old_deps.sh | 6 ++++--
 changelog.d/13082.misc       | 1 +
 poetry.lock                  | 2 +-
 pyproject.toml               | 6 +++---
 4 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13082.misc

diff --git a/.ci/scripts/test_old_deps.sh b/.ci/scripts/test_old_deps.sh
index 769ca4517e..7d0625fa86 100755
--- a/.ci/scripts/test_old_deps.sh
+++ b/.ci/scripts/test_old_deps.sh
@@ -27,9 +27,10 @@ export VIRTUALENV_NO_DOWNLOAD=1
 
 # Patch the project definitions in-place:
 # - Replace all lower and tilde bounds with exact bounds
-# - Make the pyopenssl 17.0, which is the oldest version that works with
-#   a `cryptography` compiled against OpenSSL 1.1.
+# - Replace all caret bounds---but not the one that defines the supported Python version!
 # - Delete all lines referring to psycopg2 --- so no testing of postgres support.
+# - Use pyopenssl 17.0, which is the oldest version that works with
+#   a `cryptography` compiled against OpenSSL 1.1.
 # - Omit systemd: we're not logging to journal here.
 
 # TODO: also replace caret bounds, see https://python-poetry.org/docs/dependency-specification/#version-constraints
@@ -40,6 +41,7 @@ export VIRTUALENV_NO_DOWNLOAD=1
 
 sed -i \
    -e "s/[~>]=/==/g" \
+   -e '/^python = "^/!s/\^/==/g' \
    -e "/psycopg2/d" \
    -e 's/pyOpenSSL = "==16.0.0"/pyOpenSSL = "==17.0.0"/' \
    -e '/systemd/d' \
diff --git a/changelog.d/13082.misc b/changelog.d/13082.misc
new file mode 100644
index 0000000000..1aa386dbf7
--- /dev/null
+++ b/changelog.d/13082.misc
@@ -0,0 +1 @@
+Pin dependencies maintained by matrix.org to [semantic version](https://semver.org/) bounds.
diff --git a/poetry.lock b/poetry.lock
index 849e8a7a99..49fbaab577 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1563,7 +1563,7 @@ url_preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "73882e279e0379482f2fc7414cb71addfd408ca48ad508ff8a02b0cb544762af"
+content-hash = "e96625923122e29b6ea5964379828e321b6cede2b020fc32c6f86c09d86d1ae8"
 
 [metadata.files]
 attrs = [
diff --git a/pyproject.toml b/pyproject.toml
index 44aa775c33..3a56c42c0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,9 +110,9 @@ jsonschema = ">=3.0.0"
 frozendict = ">=1,!=2.1.2"
 # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0
 unpaddedbase64 = ">=2.1.0"
-canonicaljson = ">=1.4.0"
+canonicaljson = "^1.4.0"
 # we use the type definitions added in signedjson 1.1.
-signedjson = ">=1.1.0"
+signedjson = "^1.1.0"
 # validating SSL certs for IP addresses requires service_identity 18.1.
 service-identity = ">=18.1.0"
 # Twisted 18.9 introduces some logger improvements that the structured
@@ -150,7 +150,7 @@ typing-extensions = ">=3.10.0.1"
 cryptography = ">=3.4.7"
 # ijson 3.1.4 fixes a bug with "." in property names
 ijson = ">=3.1.4"
-matrix-common = "~=1.2.1"
+matrix-common = "^1.2.1"
 # We need packaging.requirements.Requirement, added in 16.1.
 packaging = ">=16.1"
 # At the time of writing, we only use functions from the version `importlib.metadata`

From d54909956ef616d976b3d9969be994df5b65030a Mon Sep 17 00:00:00 2001
From: santhoshivan23 <47689668+santhoshivan23@users.noreply.github.com>
Date: Wed, 22 Jun 2022 20:02:18 +0530
Subject: [PATCH 025/178] validate room alias before interacting with the room
 directory (#13106)

---
 changelog.d/13106.bugfix            |  1 +
 synapse/rest/client/directory.py    |  6 ++++++
 tests/rest/client/test_directory.py | 13 +++++++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 changelog.d/13106.bugfix

diff --git a/changelog.d/13106.bugfix b/changelog.d/13106.bugfix
new file mode 100644
index 0000000000..0dc16bad08
--- /dev/null
+++ b/changelog.d/13106.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where room directory requests would cause an internal server error if given a malformed room alias.
\ No newline at end of file
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index 9639d4fe2c..d6c89cb162 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -46,6 +46,8 @@ class ClientDirectoryServer(RestServlet):
         self.auth = hs.get_auth()
 
     async def on_GET(self, request: Request, room_alias: str) -> Tuple[int, JsonDict]:
+        if not RoomAlias.is_valid(room_alias):
+            raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM)
         room_alias_obj = RoomAlias.from_string(room_alias)
 
         res = await self.directory_handler.get_association(room_alias_obj)
@@ -55,6 +57,8 @@ class ClientDirectoryServer(RestServlet):
     async def on_PUT(
         self, request: SynapseRequest, room_alias: str
     ) -> Tuple[int, JsonDict]:
+        if not RoomAlias.is_valid(room_alias):
+            raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM)
         room_alias_obj = RoomAlias.from_string(room_alias)
 
         content = parse_json_object_from_request(request)
@@ -89,6 +93,8 @@ class ClientDirectoryServer(RestServlet):
     async def on_DELETE(
         self, request: SynapseRequest, room_alias: str
     ) -> Tuple[int, JsonDict]:
+        if not RoomAlias.is_valid(room_alias):
+            raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM)
         room_alias_obj = RoomAlias.from_string(room_alias)
         requester = await self.auth.get_user_by_req(request)
 
diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py
index 67473a68d7..16e7ef41bc 100644
--- a/tests/rest/client/test_directory.py
+++ b/tests/rest/client/test_directory.py
@@ -215,6 +215,19 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, expected_code, channel.result)
         return alias
 
+    def test_invalid_alias(self) -> None:
+        alias = "#potato"
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/r0/directory/room/{alias}",
+            access_token=self.user_tok,
+        )
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
+        self.assertIn("error", channel.json_body, channel.json_body)
+        self.assertEqual(
+            channel.json_body["errcode"], "M_INVALID_PARAM", channel.json_body
+        )
+
     def random_alias(self, length: int) -> str:
         return RoomAlias(random_string(length), self.hs.hostname).to_string()
 

From 3ceaf1462d90281c31dc64d79fb35b0def30150a Mon Sep 17 00:00:00 2001
From: Aaron Raimist <aaron@raim.ist>
Date: Mon, 27 Jun 2022 10:15:25 +0000
Subject: [PATCH 026/178] Remove docs for Delete Group Admin API (#13112)

This API no longer exists.

Signed-off-by: Aaron Raimist <aaron@raim.ist>
---
 changelog.d/13112.doc          |  1 +
 docs/SUMMARY.md                |  1 -
 docs/admin_api/delete_group.md | 14 --------------
 3 files changed, 1 insertion(+), 15 deletions(-)
 create mode 100644 changelog.d/13112.doc
 delete mode 100644 docs/admin_api/delete_group.md

diff --git a/changelog.d/13112.doc b/changelog.d/13112.doc
new file mode 100644
index 0000000000..4b99951c70
--- /dev/null
+++ b/changelog.d/13112.doc
@@ -0,0 +1 @@
+Remove documentation for the Delete Group Admin API which no longer exists.
\ No newline at end of file
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index d7cf2df112..b51c7a3cb4 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -55,7 +55,6 @@
     - [Admin API](usage/administration/admin_api/README.md)
       - [Account Validity](admin_api/account_validity.md)
       - [Background Updates](usage/administration/admin_api/background_updates.md)
-      - [Delete Group](admin_api/delete_group.md)
       - [Event Reports](admin_api/event_reports.md)
       - [Media](admin_api/media_admin_api.md)
       - [Purge History](admin_api/purge_history_api.md)
diff --git a/docs/admin_api/delete_group.md b/docs/admin_api/delete_group.md
deleted file mode 100644
index 73a96842ac..0000000000
--- a/docs/admin_api/delete_group.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Delete a local group
-
-This API lets a server admin delete a local group. Doing so will kick all
-users out of the group so that their clients will correctly handle the group
-being deleted.
-
-To use it, you will need to authenticate by providing an `access_token`
-for a server admin: see [Admin API](../usage/administration/admin_api).
-
-The API is:
-
-```
-POST /_synapse/admin/v1/delete_group/<group_id>
-```

From 3c5549e74ad37c07b8613729aa99117cbed81424 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 27 Jun 2022 11:43:20 +0100
Subject: [PATCH 027/178] Refactor the Dockerfile-workers configuration script
 to use Jinja2 templates in Synapse workers' Supervisord blocks. (#13054)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 changelog.d/13054.misc                        |  1 +
 docker/conf-workers/supervisord.conf.j2       | 14 --------
 .../conf-workers/synapse.supervisord.conf.j2  | 30 ++++++++++++++++
 docker/configure_workers_and_start.py         | 36 +++++++------------
 4 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 changelog.d/13054.misc
 create mode 100644 docker/conf-workers/synapse.supervisord.conf.j2

diff --git a/changelog.d/13054.misc b/changelog.d/13054.misc
new file mode 100644
index 0000000000..0880553739
--- /dev/null
+++ b/changelog.d/13054.misc
@@ -0,0 +1 @@
+Refactor the Dockerfile-workers configuration script to use Jinja2 templates in Synapse workers' Supervisord blocks.
\ No newline at end of file
diff --git a/docker/conf-workers/supervisord.conf.j2 b/docker/conf-workers/supervisord.conf.j2
index 7afab05133..086137494e 100644
--- a/docker/conf-workers/supervisord.conf.j2
+++ b/docker/conf-workers/supervisord.conf.j2
@@ -31,17 +31,3 @@ autorestart=true
 # Redis can be disabled if the image is being used without workers
 autostart={{ enable_redis }}
 
-[program:synapse_main]
-command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver --config-path="{{ main_config_path }}" --config-path=/conf/workers/shared.yaml
-priority=10
-# Log startup failures to supervisord's stdout/err
-# Regular synapse logs will still go in the configured data directory
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-stderr_logfile=/dev/stderr
-stderr_logfile_maxbytes=0
-autorestart=unexpected
-exitcodes=0
-
-# Additional process blocks
-{{ worker_config }}
diff --git a/docker/conf-workers/synapse.supervisord.conf.j2 b/docker/conf-workers/synapse.supervisord.conf.j2
new file mode 100644
index 0000000000..6443450491
--- /dev/null
+++ b/docker/conf-workers/synapse.supervisord.conf.j2
@@ -0,0 +1,30 @@
+[program:synapse_main]
+command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver
+  --config-path="{{ main_config_path }}"
+  --config-path=/conf/workers/shared.yaml
+priority=10
+# Log startup failures to supervisord's stdout/err
+# Regular synapse logs will still go in the configured data directory
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+autorestart=unexpected
+exitcodes=0
+
+
+{% for worker in workers %}
+[program:synapse_{{ worker.name }}]
+command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }}
+  --config-path="{{ main_config_path }}"
+  --config-path=/conf/workers/shared.yaml
+  --config-path=/conf/workers/{{ worker.name }}.yaml
+autorestart=unexpected
+priority=500
+exitcodes=0
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+
+{% endfor %}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 2a2c13f77a..2134b648d5 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -176,21 +176,6 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
 }
 
 # Templates for sections that may be inserted multiple times in config files
-SUPERVISORD_PROCESS_CONFIG_BLOCK = """
-[program:synapse_{name}]
-command=/usr/local/bin/prefix-log /usr/local/bin/python -m {app} \
-    --config-path="{config_path}" \
-    --config-path=/conf/workers/shared.yaml \
-    --config-path=/conf/workers/{name}.yaml
-autorestart=unexpected
-priority=500
-exitcodes=0
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-stderr_logfile=/dev/stderr
-stderr_logfile_maxbytes=0
-"""
-
 NGINX_LOCATION_CONFIG_BLOCK = """
     location ~* {endpoint} {{
         proxy_pass {upstream};
@@ -353,13 +338,10 @@ def generate_worker_files(
     # This config file will be passed to all workers, included Synapse's main process.
     shared_config: Dict[str, Any] = {"listeners": listeners}
 
-    # The supervisord config. The contents of which will be inserted into the
-    # base supervisord jinja2 template.
-    #
-    # Supervisord will be in charge of running everything, from redis to nginx to Synapse
-    # and all of its worker processes. Load the config template, which defines a few
-    # services that are necessary to run.
-    supervisord_config = ""
+    # List of dicts that describe workers.
+    # We pass this to the Supervisor template later to generate the appropriate
+    # program blocks.
+    worker_descriptors: List[Dict[str, Any]] = []
 
     # Upstreams for load-balancing purposes. This dict takes the form of a worker type to the
     # ports of each worker. For example:
@@ -437,7 +419,7 @@ def generate_worker_files(
             )
 
         # Enable the worker in supervisord
-        supervisord_config += SUPERVISORD_PROCESS_CONFIG_BLOCK.format_map(worker_config)
+        worker_descriptors.append(worker_config)
 
         # Add nginx location blocks for this worker's endpoints (if any are defined)
         for pattern in worker_config["endpoint_patterns"]:
@@ -535,10 +517,16 @@ def generate_worker_files(
         "/conf/supervisord.conf.j2",
         "/etc/supervisor/supervisord.conf",
         main_config_path=config_path,
-        worker_config=supervisord_config,
         enable_redis=workers_in_use,
     )
 
+    convert(
+        "/conf/synapse.supervisord.conf.j2",
+        "/etc/supervisor/conf.d/synapse.conf",
+        workers=worker_descriptors,
+        main_config_path=config_path,
+    )
+
     # healthcheck config
     convert(
         "/conf/healthcheck.sh.j2",

From 9b683ea80f94de4249264cbf375523b987900c89 Mon Sep 17 00:00:00 2001
From: Robert Long <robert@robertlong.me>
Date: Mon, 27 Jun 2022 06:44:05 -0700
Subject: [PATCH 028/178] Add Cross-Origin-Resource-Policy header to thumbnail
 and download media endpoints (#12944)

---
 changelog.d/12944.misc                      |  1 +
 synapse/http/server.py                      | 11 +++++++++++
 synapse/rest/media/v1/download_resource.py  |  7 ++++++-
 synapse/rest/media/v1/thumbnail_resource.py |  7 ++++++-
 tests/rest/media/v1/test_media_storage.py   | 20 ++++++++++++++++++++
 5 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/12944.misc

diff --git a/changelog.d/12944.misc b/changelog.d/12944.misc
new file mode 100644
index 0000000000..bf27fe7e2c
--- /dev/null
+++ b/changelog.d/12944.misc
@@ -0,0 +1 @@
+Add `Cross-Origin-Resource-Policy: cross-origin` header to content repository's thumbnail and download endpoints.
\ No newline at end of file
diff --git a/synapse/http/server.py b/synapse/http/server.py
index e3dcc3f3dd..cf2d6f904b 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -928,6 +928,17 @@ def set_cors_headers(request: Request) -> None:
     )
 
 
+def set_corp_headers(request: Request) -> None:
+    """Set the CORP headers so that javascript running in a web browsers can
+    embed the resource returned from this request when their client requires
+    the `Cross-Origin-Embedder-Policy: require-corp` header.
+
+    Args:
+        request: The http request to add the CORP header to.
+    """
+    request.setHeader(b"Cross-Origin-Resource-Policy", b"cross-origin")
+
+
 def respond_with_html(request: Request, code: int, html: str) -> None:
     """
     Wraps `respond_with_html_bytes` by first encoding HTML from a str to UTF-8 bytes.
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index 6180fa575e..048a042692 100644
--- a/synapse/rest/media/v1/download_resource.py
+++ b/synapse/rest/media/v1/download_resource.py
@@ -15,7 +15,11 @@
 import logging
 from typing import TYPE_CHECKING
 
-from synapse.http.server import DirectServeJsonResource, set_cors_headers
+from synapse.http.server import (
+    DirectServeJsonResource,
+    set_corp_headers,
+    set_cors_headers,
+)
 from synapse.http.servlet import parse_boolean
 from synapse.http.site import SynapseRequest
 
@@ -38,6 +42,7 @@ class DownloadResource(DirectServeJsonResource):
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
+        set_corp_headers(request)
         request.setHeader(
             b"Content-Security-Policy",
             b"sandbox;"
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 53b1565243..2295adfaa7 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -18,7 +18,11 @@ import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 from synapse.api.errors import SynapseError
-from synapse.http.server import DirectServeJsonResource, set_cors_headers
+from synapse.http.server import (
+    DirectServeJsonResource,
+    set_corp_headers,
+    set_cors_headers,
+)
 from synapse.http.servlet import parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.media.v1.media_storage import MediaStorage
@@ -58,6 +62,7 @@ class ThumbnailResource(DirectServeJsonResource):
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
+        set_corp_headers(request)
         server_name, media_id, _ = parse_media_id(request)
         width = parse_integer(request, "width", required=True)
         height = parse_integer(request, "height", required=True)
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 7204b2dfe0..1c67e1ca91 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -481,6 +481,12 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
         if expected_found:
             self.assertEqual(channel.code, 200)
+
+            self.assertEqual(
+                channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
+                [b"cross-origin"],
+            )
+
             if expected_body is not None:
                 self.assertEqual(
                     channel.result["body"], expected_body, channel.result["body"]
@@ -549,6 +555,20 @@ class MediaRepoTests(unittest.HomeserverTestCase):
             [b"noindex, nofollow, noarchive, noimageindex"],
         )
 
+    def test_cross_origin_resource_policy_header(self) -> None:
+        """
+        Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
+        allowing web clients to embed media from the downloads API.
+        """
+        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+        headers = channel.headers
+
+        self.assertEqual(
+            headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
+            [b"cross-origin"],
+        )
+
 
 class TestSpamChecker:
     """A spam checker module that rejects all media that includes the bytes

From 1017f09c18b2ae6e350df1e7755ae480fd180853 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0imon=20Brandner?= <simon.bra.ag@gmail.com>
Date: Mon, 27 Jun 2022 21:28:34 +0200
Subject: [PATCH 029/178] Update MSC3786 implementation: Check the `state_key`
 (#12939)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Šimon Brandner <simon.bra.ag@gmail.com>
---
 changelog.d/12939.bugfix  | 1 +
 synapse/push/baserules.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/12939.bugfix

diff --git a/changelog.d/12939.bugfix b/changelog.d/12939.bugfix
new file mode 100644
index 0000000000..d9061cf8e5
--- /dev/null
+++ b/changelog.d/12939.bugfix
@@ -0,0 +1 @@
+Update [MSC3786](https://github.com/matrix-org/matrix-spec-proposals/pull/3786) implementation to check `state_key`.
diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py
index 819bc9e9b6..6c0cc5a6ce 100644
--- a/synapse/push/baserules.py
+++ b/synapse/push/baserules.py
@@ -290,7 +290,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [
                 "key": "type",
                 "pattern": "m.room.server_acl",
                 "_cache_key": "_room_server_acl",
-            }
+            },
+            {
+                "kind": "event_match",
+                "key": "state_key",
+                "pattern": "",
+                "_cache_key": "_room_server_acl_state_key",
+            },
         ],
         "actions": [],
     },

From 6b99a66fe0260682fa95a0b19d3bee19c1e48876 Mon Sep 17 00:00:00 2001
From: santhoshivan23 <47689668+santhoshivan23@users.noreply.github.com>
Date: Tue, 28 Jun 2022 16:52:59 +0530
Subject: [PATCH 030/178] Remove unspecced DELETE endpoint that modifies room
 visibility (#13123)

---
 changelog.d/13123.removal        |  1 +
 synapse/rest/client/directory.py | 11 -----------
 2 files changed, 1 insertion(+), 11 deletions(-)
 create mode 100644 changelog.d/13123.removal

diff --git a/changelog.d/13123.removal b/changelog.d/13123.removal
new file mode 100644
index 0000000000..f013f16163
--- /dev/null
+++ b/changelog.d/13123.removal
@@ -0,0 +1 @@
+Remove the unspecced `DELETE /directory/list/room/{roomId}` endpoint, which hid rooms from the [public room directory](https://spec.matrix.org/v1.3/client-server-api/#listing-rooms). Instead, `PUT` to the same URL with a visibility of `"private"`.
\ No newline at end of file
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index d6c89cb162..bc1b18c92d 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -151,17 +151,6 @@ class ClientDirectoryListServer(RestServlet):
 
         return 200, {}
 
-    async def on_DELETE(
-        self, request: SynapseRequest, room_id: str
-    ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request)
-
-        await self.directory_handler.edit_published_room_list(
-            requester, room_id, "private"
-        )
-
-        return 200, {}
-
 
 class ClientAppserviceDirectoryListServer(RestServlet):
     PATTERNS = client_patterns(

From f1145563f662653e451525032b043d1a58998b6d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 28 Jun 2022 14:12:17 +0200
Subject: [PATCH 031/178] Extra type annotations in `test_server` (#13124)

---
 changelog.d/13124.misc |  1 +
 mypy.ini               |  3 ++
 tests/test_server.py   | 81 +++++++++++++++++++++++-------------------
 3 files changed, 48 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/13124.misc

diff --git a/changelog.d/13124.misc b/changelog.d/13124.misc
new file mode 100644
index 0000000000..513078f8d6
--- /dev/null
+++ b/changelog.d/13124.misc
@@ -0,0 +1 @@
+Add type annotations to `tests.test_server`.
diff --git a/mypy.ini b/mypy.ini
index c5130feaec..4b08f45c6d 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -113,6 +113,9 @@ disallow_untyped_defs = False
 [mypy-tests.handlers.test_user_directory]
 disallow_untyped_defs = True
 
+[mypy-tests.test_server]
+disallow_untyped_defs = True
+
 [mypy-tests.state.test_profile]
 disallow_untyped_defs = True
 
diff --git a/tests/test_server.py b/tests/test_server.py
index 847432f791..fc4bce899c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -14,7 +14,7 @@
 
 import re
 from http import HTTPStatus
-from typing import Tuple
+from typing import Awaitable, Callable, Dict, NoReturn, Optional, Tuple
 
 from twisted.internet.defer import Deferred
 from twisted.web.resource import Resource
@@ -36,6 +36,7 @@ from synapse.util import Clock
 from tests import unittest
 from tests.http.server._base import test_disconnect
 from tests.server import (
+    FakeChannel,
     FakeSite,
     ThreadedMemoryReactorClock,
     make_request,
@@ -44,7 +45,7 @@ from tests.server import (
 
 
 class JsonResourceTests(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
         self.hs_clock = Clock(self.reactor)
         self.homeserver = setup_test_homeserver(
@@ -54,7 +55,7 @@ class JsonResourceTests(unittest.TestCase):
             reactor=self.reactor,
         )
 
-    def test_handler_for_request(self):
+    def test_handler_for_request(self) -> None:
         """
         JsonResource.handler_for_request gives correctly decoded URL args to
         the callback, while Twisted will give the raw bytes of URL query
@@ -62,7 +63,9 @@ class JsonResourceTests(unittest.TestCase):
         """
         got_kwargs = {}
 
-        def _callback(request, **kwargs):
+        def _callback(
+            request: SynapseRequest, **kwargs: object
+        ) -> Tuple[int, Dict[str, object]]:
             got_kwargs.update(kwargs)
             return 200, kwargs
 
@@ -83,13 +86,13 @@ class JsonResourceTests(unittest.TestCase):
 
         self.assertEqual(got_kwargs, {"room_id": "\N{SNOWMAN}"})
 
-    def test_callback_direct_exception(self):
+    def test_callback_direct_exception(self) -> None:
         """
         If the web callback raises an uncaught exception, it will be translated
         into a 500.
         """
 
-        def _callback(request, **kwargs):
+        def _callback(request: SynapseRequest, **kwargs: object) -> NoReturn:
             raise Exception("boo")
 
         res = JsonResource(self.homeserver)
@@ -103,17 +106,17 @@ class JsonResourceTests(unittest.TestCase):
 
         self.assertEqual(channel.result["code"], b"500")
 
-    def test_callback_indirect_exception(self):
+    def test_callback_indirect_exception(self) -> None:
         """
         If the web callback raises an uncaught exception in a Deferred, it will
         be translated into a 500.
         """
 
-        def _throw(*args):
+        def _throw(*args: object) -> NoReturn:
             raise Exception("boo")
 
-        def _callback(request, **kwargs):
-            d = Deferred()
+        def _callback(request: SynapseRequest, **kwargs: object) -> "Deferred[None]":
+            d: "Deferred[None]" = Deferred()
             d.addCallback(_throw)
             self.reactor.callLater(0.5, d.callback, True)
             return make_deferred_yieldable(d)
@@ -129,13 +132,13 @@ class JsonResourceTests(unittest.TestCase):
 
         self.assertEqual(channel.result["code"], b"500")
 
-    def test_callback_synapseerror(self):
+    def test_callback_synapseerror(self) -> None:
         """
         If the web callback raises a SynapseError, it returns the appropriate
         status code and message set in it.
         """
 
-        def _callback(request, **kwargs):
+        def _callback(request: SynapseRequest, **kwargs: object) -> NoReturn:
             raise SynapseError(403, "Forbidden!!one!", Codes.FORBIDDEN)
 
         res = JsonResource(self.homeserver)
@@ -151,12 +154,12 @@ class JsonResourceTests(unittest.TestCase):
         self.assertEqual(channel.json_body["error"], "Forbidden!!one!")
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
 
-    def test_no_handler(self):
+    def test_no_handler(self) -> None:
         """
         If there is no handler to process the request, Synapse will return 400.
         """
 
-        def _callback(request, **kwargs):
+        def _callback(request: SynapseRequest, **kwargs: object) -> None:
             """
             Not ever actually called!
             """
@@ -175,14 +178,16 @@ class JsonResourceTests(unittest.TestCase):
         self.assertEqual(channel.json_body["error"], "Unrecognized request")
         self.assertEqual(channel.json_body["errcode"], "M_UNRECOGNIZED")
 
-    def test_head_request(self):
+    def test_head_request(self) -> None:
         """
         JsonResource.handler_for_request gives correctly decoded URL args to
         the callback, while Twisted will give the raw bytes of URL query
         arguments.
         """
 
-        def _callback(request, **kwargs):
+        def _callback(
+            request: SynapseRequest, **kwargs: object
+        ) -> Tuple[int, Dict[str, object]]:
             return 200, {"result": True}
 
         res = JsonResource(self.homeserver)
@@ -203,20 +208,21 @@ class JsonResourceTests(unittest.TestCase):
 
 
 class OptionsResourceTests(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
 
         class DummyResource(Resource):
             isLeaf = True
 
-            def render(self, request):
-                return request.path
+            def render(self, request: SynapseRequest) -> bytes:
+                # Type-ignore: mypy thinks request.path is Optional[Any], not bytes.
+                return request.path  # type: ignore[return-value]
 
         # Setup a resource with some children.
         self.resource = OptionsResource()
         self.resource.putChild(b"res", DummyResource())
 
-    def _make_request(self, method, path):
+    def _make_request(self, method: bytes, path: bytes) -> FakeChannel:
         """Create a request from the method/path and return a channel with the response."""
         # Create a site and query for the resource.
         site = SynapseSite(
@@ -233,7 +239,7 @@ class OptionsResourceTests(unittest.TestCase):
         channel = make_request(self.reactor, site, method, path, shorthand=False)
         return channel
 
-    def test_unknown_options_request(self):
+    def test_unknown_options_request(self) -> None:
         """An OPTIONS requests to an unknown URL still returns 204 No Content."""
         channel = self._make_request(b"OPTIONS", b"/foo/")
         self.assertEqual(channel.result["code"], b"204")
@@ -253,7 +259,7 @@ class OptionsResourceTests(unittest.TestCase):
             "has CORS Headers header",
         )
 
-    def test_known_options_request(self):
+    def test_known_options_request(self) -> None:
         """An OPTIONS requests to an known URL still returns 204 No Content."""
         channel = self._make_request(b"OPTIONS", b"/res/")
         self.assertEqual(channel.result["code"], b"204")
@@ -273,12 +279,12 @@ class OptionsResourceTests(unittest.TestCase):
             "has CORS Headers header",
         )
 
-    def test_unknown_request(self):
+    def test_unknown_request(self) -> None:
         """A non-OPTIONS request to an unknown URL should 404."""
         channel = self._make_request(b"GET", b"/foo/")
         self.assertEqual(channel.result["code"], b"404")
 
-    def test_known_request(self):
+    def test_known_request(self) -> None:
         """A non-OPTIONS request to an known URL should query the proper resource."""
         channel = self._make_request(b"GET", b"/res/")
         self.assertEqual(channel.result["code"], b"200")
@@ -287,16 +293,17 @@ class OptionsResourceTests(unittest.TestCase):
 
 class WrapHtmlRequestHandlerTests(unittest.TestCase):
     class TestResource(DirectServeHtmlResource):
-        callback = None
+        callback: Optional[Callable[..., Awaitable[None]]]
 
-        async def _async_render_GET(self, request):
+        async def _async_render_GET(self, request: SynapseRequest) -> None:
+            assert self.callback is not None
             await self.callback(request)
 
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
 
-    def test_good_response(self):
-        async def callback(request):
+    def test_good_response(self) -> None:
+        async def callback(request: SynapseRequest) -> None:
             request.write(b"response")
             request.finish()
 
@@ -311,13 +318,13 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
         body = channel.result["body"]
         self.assertEqual(body, b"response")
 
-    def test_redirect_exception(self):
+    def test_redirect_exception(self) -> None:
         """
         If the callback raises a RedirectException, it is turned into a 30x
         with the right location.
         """
 
-        async def callback(request, **kwargs):
+        async def callback(request: SynapseRequest, **kwargs: object) -> None:
             raise RedirectException(b"/look/an/eagle", 301)
 
         res = WrapHtmlRequestHandlerTests.TestResource()
@@ -332,13 +339,13 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
         location_headers = [v for k, v in headers if k == b"Location"]
         self.assertEqual(location_headers, [b"/look/an/eagle"])
 
-    def test_redirect_exception_with_cookie(self):
+    def test_redirect_exception_with_cookie(self) -> None:
         """
         If the callback raises a RedirectException which sets a cookie, that is
         returned too
         """
 
-        async def callback(request, **kwargs):
+        async def callback(request: SynapseRequest, **kwargs: object) -> NoReturn:
             e = RedirectException(b"/no/over/there", 304)
             e.cookies.append(b"session=yespls")
             raise e
@@ -357,10 +364,10 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
         cookies_headers = [v for k, v in headers if k == b"Set-Cookie"]
         self.assertEqual(cookies_headers, [b"session=yespls"])
 
-    def test_head_request(self):
+    def test_head_request(self) -> None:
         """A head request should work by being turned into a GET request."""
 
-        async def callback(request):
+        async def callback(request: SynapseRequest) -> None:
             request.write(b"response")
             request.finish()
 
@@ -410,7 +417,7 @@ class CancellableDirectServeHtmlResource(DirectServeHtmlResource):
 class DirectServeJsonResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeJsonResource` cancellation."""
 
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
         self.clock = Clock(self.reactor)
         self.resource = CancellableDirectServeJsonResource(self.clock)
@@ -444,7 +451,7 @@ class DirectServeJsonResourceCancellationTests(unittest.TestCase):
 class DirectServeHtmlResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeHtmlResource` cancellation."""
 
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
         self.clock = Clock(self.reactor)
         self.resource = CancellableDirectServeHtmlResource(self.clock)

From 7469824d5838577f5a07aec6ab73b457459d8b4a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 28 Jun 2022 13:13:44 +0100
Subject: [PATCH 032/178] Fix serialization errors when rotating notifications
 (#13118)

---
 changelog.d/13118.misc                        |   1 +
 .../databases/main/event_push_actions.py      | 201 ++++++++++++------
 synapse/storage/databases/main/receipts.py    |  13 +-
 .../delta/72/01event_push_summary_receipt.sql |  35 +++
 tests/storage/test_event_push_actions.py      |  35 ++-
 5 files changed, 202 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/13118.misc
 create mode 100644 synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql

diff --git a/changelog.d/13118.misc b/changelog.d/13118.misc
new file mode 100644
index 0000000000..3bb51962e7
--- /dev/null
+++ b/changelog.d/13118.misc
@@ -0,0 +1 @@
+Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 10a7962382..80ca2fd0b6 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -233,14 +233,30 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
 
         counts = NotifCounts()
 
-        # First we pull the counts from the summary table
+        # First we pull the counts from the summary table.
+        #
+        # We check that `last_receipt_stream_ordering` matches the stream
+        # ordering given. If it doesn't match then a new read receipt has arrived and
+        # we haven't yet updated the counts in `event_push_summary` to reflect
+        # that; in that case we simply ignore `event_push_summary` counts
+        # and do a manual count of all of the rows in the `event_push_actions` table
+        # for this user/room.
+        #
+        # If `last_receipt_stream_ordering` is null then that means it's up to
+        # date (as the row was written by an older version of Synapse that
+        # updated `event_push_summary` synchronously when persisting a new read
+        # receipt).
         txn.execute(
             """
                 SELECT stream_ordering, notif_count, COALESCE(unread_count, 0)
                 FROM event_push_summary
-                WHERE room_id = ? AND user_id = ? AND stream_ordering > ?
+                WHERE room_id = ? AND user_id = ?
+                AND (
+                    (last_receipt_stream_ordering IS NULL AND stream_ordering > ?)
+                    OR last_receipt_stream_ordering = ?
+                )
             """,
-            (room_id, user_id, stream_ordering),
+            (room_id, user_id, stream_ordering, stream_ordering),
         )
         row = txn.fetchone()
 
@@ -263,9 +279,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         if row:
             counts.highlight_count += row[0]
 
-        # Finally we need to count push actions that haven't been summarized
-        # yet.
-        # We only want to pull out push actions that we haven't summarized yet.
+        # Finally we need to count push actions that aren't included in the
+        # summary returned above, e.g. recent events that haven't been
+        # summarized yet, or the summary is empty due to a recent read receipt.
         stream_ordering = max(stream_ordering, summary_stream_ordering)
         notify_count, unread_count = self._get_notif_unread_count_for_user_room(
             txn, room_id, user_id, stream_ordering
@@ -800,6 +816,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         self._doing_notif_rotation = True
 
         try:
+            # First we recalculate push summaries and delete stale push actions
+            # for rooms/users with new receipts.
+            while True:
+                logger.debug("Handling new receipts")
+
+                caught_up = await self.db_pool.runInteraction(
+                    "_handle_new_receipts_for_notifs_txn",
+                    self._handle_new_receipts_for_notifs_txn,
+                )
+                if caught_up:
+                    break
+
+            # Then we update the event push summaries for any new events
             while True:
                 logger.info("Rotating notifications")
 
@@ -810,10 +839,110 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
                     break
                 await self.hs.get_clock().sleep(self._rotate_delay)
 
+            # Finally we clear out old event push actions.
             await self._remove_old_push_actions_that_have_rotated()
         finally:
             self._doing_notif_rotation = False
 
+    def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool:
+        """Check for new read receipts and delete from event push actions.
+
+        Any push actions which predate the user's most recent read receipt are
+        now redundant, so we can remove them from `event_push_actions` and
+        update `event_push_summary`.
+        """
+
+        limit = 100
+
+        min_stream_id = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="event_push_summary_last_receipt_stream_id",
+            keyvalues={},
+            retcol="stream_id",
+        )
+
+        sql = """
+            SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
+            FROM receipts_linearized AS r
+            INNER JOIN events AS e USING (event_id)
+            WHERE r.stream_id > ? AND user_id LIKE ?
+            ORDER BY r.stream_id ASC
+            LIMIT ?
+        """
+
+        # We only want local users, so we add a dodgy filter to the above query
+        # and recheck it below.
+        user_filter = "%:" + self.hs.hostname
+
+        txn.execute(
+            sql,
+            (
+                min_stream_id,
+                user_filter,
+                limit,
+            ),
+        )
+        rows = txn.fetchall()
+
+        # For each new read receipt we delete push actions from before it and
+        # recalculate the summary.
+        for _, room_id, user_id, stream_ordering in rows:
+            # Only handle our own read receipts.
+            if not self.hs.is_mine_id(user_id):
+                continue
+
+            txn.execute(
+                """
+                DELETE FROM event_push_actions
+                WHERE room_id = ?
+                    AND user_id = ?
+                    AND stream_ordering <= ?
+                    AND highlight = 0
+                """,
+                (room_id, user_id, stream_ordering),
+            )
+
+            old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="event_push_summary_stream_ordering",
+                keyvalues={},
+                retcol="stream_ordering",
+            )
+
+            notif_count, unread_count = self._get_notif_unread_count_for_user_room(
+                txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
+            )
+
+            self.db_pool.simple_upsert_txn(
+                txn,
+                table="event_push_summary",
+                keyvalues={"room_id": room_id, "user_id": user_id},
+                values={
+                    "notif_count": notif_count,
+                    "unread_count": unread_count,
+                    "stream_ordering": old_rotate_stream_ordering,
+                    "last_receipt_stream_ordering": stream_ordering,
+                },
+            )
+
+        # We always update `event_push_summary_last_receipt_stream_id` to
+        # ensure that we don't rescan the same receipts for remote users.
+        #
+        # This requires repeatable read to be safe, as we need the
+        # `MAX(stream_id)` to not include any new rows that have been committed
+        # since the start of the transaction (since those rows won't have been
+        # returned by the query above). Alternatively we could query the max
+        # stream ID at the start of the transaction and bound everything by
+        # that.
+        txn.execute(
+            """
+            UPDATE event_push_summary_last_receipt_stream_id
+            SET stream_id = (SELECT COALESCE(MAX(stream_id), 0) FROM receipts_linearized)
+            """
+        )
+
+        return len(rows) < limit
+
     def _rotate_notifs_txn(self, txn: LoggingTransaction) -> bool:
         """Archives older notifications into event_push_summary. Returns whether
         the archiving process has caught up or not.
@@ -1033,66 +1162,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
             if done:
                 break
 
-    def _remove_old_push_actions_before_txn(
-        self, txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int
-    ) -> None:
-        """
-        Purges old push actions for a user and room before a given
-        stream_ordering.
-
-        We however keep a months worth of highlighted notifications, so that
-        users can still get a list of recent highlights.
-
-        Args:
-            txn: The transaction
-            room_id: Room ID to delete from
-            user_id: user ID to delete for
-            stream_ordering: The lowest stream ordering which will
-                                  not be deleted.
-        """
-        txn.call_after(
-            self.get_unread_event_push_actions_by_room_for_user.invalidate,
-            (room_id, user_id),
-        )
-
-        # We need to join on the events table to get the received_ts for
-        # event_push_actions and sqlite won't let us use a join in a delete so
-        # we can't just delete where received_ts < x. Furthermore we can
-        # only identify event_push_actions by a tuple of room_id, event_id
-        # we we can't use a subquery.
-        # Instead, we look up the stream ordering for the last event in that
-        # room received before the threshold time and delete event_push_actions
-        # in the room with a stream_odering before that.
-        txn.execute(
-            "DELETE FROM event_push_actions "
-            " WHERE user_id = ? AND room_id = ? AND "
-            " stream_ordering <= ?"
-            " AND ((stream_ordering < ? AND highlight = 1) or highlight = 0)",
-            (user_id, room_id, stream_ordering, self.stream_ordering_month_ago),
-        )
-
-        old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
-            txn,
-            table="event_push_summary_stream_ordering",
-            keyvalues={},
-            retcol="stream_ordering",
-        )
-
-        notif_count, unread_count = self._get_notif_unread_count_for_user_room(
-            txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
-        )
-
-        self.db_pool.simple_upsert_txn(
-            txn,
-            table="event_push_summary",
-            keyvalues={"room_id": room_id, "user_id": user_id},
-            values={
-                "notif_count": notif_count,
-                "unread_count": unread_count,
-                "stream_ordering": old_rotate_stream_ordering,
-            },
-        )
-
 
 class EventPushActionsStore(EventPushActionsWorkerStore):
     EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index bec6d60577..0090c9f225 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -26,7 +26,7 @@ from typing import (
     cast,
 )
 
-from synapse.api.constants import EduTypes, ReceiptTypes
+from synapse.api.constants import EduTypes
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import ReceiptsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -682,17 +682,6 @@ class ReceiptsWorkerStore(SQLBaseStore):
             lock=False,
         )
 
-        # When updating a local users read receipt, remove any push actions
-        # which resulted from the receipt's event and all earlier events.
-        if (
-            self.hs.is_mine_id(user_id)
-            and receipt_type in (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE)
-            and stream_ordering is not None
-        ):
-            self._remove_old_push_actions_before_txn(  # type: ignore[attr-defined]
-                txn, room_id=room_id, user_id=user_id, stream_ordering=stream_ordering
-            )
-
         return rx_ts
 
     def _graph_to_linear(
diff --git a/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql b/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql
new file mode 100644
index 0000000000..e45db61529
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/01event_push_summary_receipt.sql
@@ -0,0 +1,35 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a column that records the position of the read receipt for the user at
+-- the time we summarised the push actions. This is used to check if the counts
+-- are up to date after a new read receipt has been sent.
+--
+-- Null means that we can skip that check, as the row was written by an older
+-- version of Synapse that updated `event_push_summary` synchronously when
+-- persisting a new read receipt
+ALTER TABLE event_push_summary ADD COLUMN last_receipt_stream_ordering BIGINT;
+
+
+-- Tracks which new receipts we've handled
+CREATE TABLE event_push_summary_last_receipt_stream_id (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id BIGINT NOT NULL,
+    CHECK (Lock='X')
+);
+
+INSERT INTO event_push_summary_last_receipt_stream_id (stream_id)
+  SELECT COALESCE(MAX(stream_id), 0)
+  FROM receipts_linearized;
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 2ac5f6db5e..ef069a8110 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -55,7 +55,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
     def test_count_aggregation(self) -> None:
         room_id = "!foo:example.com"
-        user_id = "@user1235:example.com"
+        user_id = "@user1235:test"
 
         last_read_stream_ordering = [0]
 
@@ -81,11 +81,26 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         def _inject_actions(stream: int, action: list) -> None:
             event = Mock()
             event.room_id = room_id
-            event.event_id = "$test:example.com"
+            event.event_id = f"$test{stream}:example.com"
             event.internal_metadata.stream_ordering = stream
             event.internal_metadata.is_outlier.return_value = False
             event.depth = stream
 
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    table="events",
+                    values={
+                        "stream_ordering": stream,
+                        "topological_ordering": stream,
+                        "type": "m.room.message",
+                        "room_id": room_id,
+                        "processed": True,
+                        "outlier": False,
+                        "event_id": event.event_id,
+                    },
+                )
+            )
+
             self.get_success(
                 self.store.add_push_actions_to_staging(
                     event.event_id,
@@ -105,18 +120,28 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         def _rotate(stream: int) -> None:
             self.get_success(
                 self.store.db_pool.runInteraction(
-                    "", self.store._rotate_notifs_before_txn, stream
+                    "rotate-receipts", self.store._handle_new_receipts_for_notifs_txn
+                )
+            )
+
+            self.get_success(
+                self.store.db_pool.runInteraction(
+                    "rotate-notifs", self.store._rotate_notifs_before_txn, stream
                 )
             )
 
         def _mark_read(stream: int, depth: int) -> None:
             last_read_stream_ordering[0] = stream
+
             self.get_success(
                 self.store.db_pool.runInteraction(
                     "",
-                    self.store._remove_old_push_actions_before_txn,
+                    self.store._insert_linearized_receipt_txn,
                     room_id,
+                    "m.read",
                     user_id,
+                    f"$test{stream}:example.com",
+                    {},
                     stream,
                 )
             )
@@ -150,7 +175,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
         _assert_counts(1, 0)
 
-        _mark_read(7, 7)
+        _mark_read(6, 6)
         _assert_counts(0, 0)
 
         _inject_actions(8, HIGHLIGHT)

From fa1308061802ac7b7d20e954ba7372c5ac292333 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 28 Jun 2022 14:29:08 +0100
Subject: [PATCH 033/178] Merge pull request from GHSA-22p3-qrh9-cx32

* Make _iterate_over_text easier to read by using simple data structures

* Prefer a set of tags to ignore

In my tests, it's 4x faster to check for containment in a set of this size

* Add a stack size limit to _iterate_over_text

* Continue accepting the case where there is no body element

* Use an early return instead for None

Co-authored-by: Richard van der Hoff <richard@matrix.org>
---
 synapse/rest/media/v1/preview_html.py    | 63 +++++++++++++++---------
 tests/rest/media/v1/test_html_preview.py | 17 +++++++
 2 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index ed8f21a483..5f334f4634 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import codecs
-import itertools
 import logging
 import re
-from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Union
+from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Set, Union
 
 if TYPE_CHECKING:
     from lxml import etree
@@ -276,7 +275,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
 
     from lxml import etree
 
-    TAGS_TO_REMOVE = (
+    TAGS_TO_REMOVE = {
         "header",
         "nav",
         "aside",
@@ -291,31 +290,42 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
         "img",
         "picture",
         etree.Comment,
-    )
+    }
 
     # Split all the text nodes into paragraphs (by splitting on new
     # lines)
     text_nodes = (
         re.sub(r"\s+", "\n", el).strip()
-        for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE)
+        for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE)
     )
     return summarize_paragraphs(text_nodes)
 
 
 def _iterate_over_text(
-    tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"]
+    tree: Optional["etree.Element"],
+    tags_to_ignore: Set[Union[str, "etree.Comment"]],
+    stack_limit: int = 1024,
 ) -> Generator[str, None, None]:
     """Iterate over the tree returning text nodes in a depth first fashion,
     skipping text nodes inside certain tags.
+
+    Args:
+        tree: The parent element to iterate. Can be None if there isn't one.
+        tags_to_ignore: Set of tags to ignore
+        stack_limit: Maximum stack size limit for depth-first traversal.
+            Nodes will be dropped if this limit is hit, which may truncate the
+            textual result.
+            Intended to limit the maximum working memory when generating a preview.
     """
-    # This is basically a stack that we extend using itertools.chain.
-    # This will either consist of an element to iterate over *or* a string
+
+    if tree is None:
+        return
+
+    # This is a stack whose items are elements to iterate over *or* strings
     # to be returned.
-    elements = iter([tree])
-    while True:
-        el = next(elements, None)
-        if el is None:
-            return
+    elements: List[Union[str, "etree.Element"]] = [tree]
+    while elements:
+        el = elements.pop()
 
         if isinstance(el, str):
             yield el
@@ -329,17 +339,22 @@ def _iterate_over_text(
             if el.text:
                 yield el.text
 
-            # We add to the stack all the elements children, interspersed with
-            # each child's tail text (if it exists). The tail text of a node
-            # is text that comes *after* the node, so we always include it even
-            # if we ignore the child node.
-            elements = itertools.chain(
-                itertools.chain.from_iterable(  # Basically a flatmap
-                    [child, child.tail] if child.tail else [child]
-                    for child in el.iterchildren()
-                ),
-                elements,
-            )
+            # We add to the stack all the element's children, interspersed with
+            # each child's tail text (if it exists).
+            #
+            # We iterate in reverse order so that earlier pieces of text appear
+            # closer to the top of the stack.
+            for child in el.iterchildren(reversed=True):
+                if len(elements) > stack_limit:
+                    # We've hit our limit for working memory
+                    break
+
+                if child.tail:
+                    # The tail text of a node is text that comes *after* the node,
+                    # so we always include it even if we ignore the child node.
+                    elements.append(child.tail)
+
+                elements.append(child)
 
 
 def summarize_paragraphs(
diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py
index ea9e5889bf..61357622bd 100644
--- a/tests/rest/media/v1/test_html_preview.py
+++ b/tests/rest/media/v1/test_html_preview.py
@@ -370,6 +370,23 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
 
+    def test_nested_nodes(self) -> None:
+        """A body with some nested nodes. Tests that we iterate over children
+        in the right order (and don't reverse the order of the text)."""
+        html = b"""
+        <a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
+        with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
+            },
+        )
+
 
 class MediaEncodingTestCase(unittest.TestCase):
     def test_meta_charset(self) -> None:

From ea10cdbea703f94a84a484377485de8dc14a963a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Tue, 28 Jun 2022 14:33:56 +0100
Subject: [PATCH 034/178] 1.61.1

---
 CHANGES.md       | 21 +++++++++++++++++++++
 debian/changelog |  6 ++++++
 pyproject.toml   |  2 +-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index bd9b34dd7a..b97f014142 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,24 @@
+Synapse 1.61.1 (2022-06-28)
+===========================
+
+This patch release fixes a security issue regarding URL previews, affecting all prior versions of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild.
+
+Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below.
+
+## Security advisory
+
+The following issue is fixed in 1.61.1.
+
+* [GHSA-22p3-qrh9-cx32](https://github.com/matrix-org/synapse/security/advisories/GHSA-22p3-qrh9-cx32) / [CVE-2022-31052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-31052)
+
+  Synapse instances with the [`url_preview_enabled`](https://matrix-org.github.io/synapse/v1.61/usage/configuration/config_documentation.html#media-store) homeserver config option set to `true` are affected. URL previews of some web pages can lead to unbounded recursion, causing the request to either fail, or in some cases crash the running Synapse process.
+
+  Requesting URL previews requires authentication. Nevertheless, it is possible to exploit this maliciously, either by malicious users on the homeserver, or by remote users sending URLs that a local user's client may automatically request a URL preview for.
+
+  Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality.
+
+  Fixed by fa1308061802ac7b7d20e954ba7372c5ac292333.
+
 Synapse 1.61.0 (2022-06-14)
 ===========================
 
diff --git a/debian/changelog b/debian/changelog
index 753a03065a..2ca565a157 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.61.1) stable; urgency=medium
+
+  * New Synapse release 1.61.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 28 Jun 2022 14:33:46 +0100
+
 matrix-synapse-py3 (1.61.0) stable; urgency=medium
 
   * New Synapse release 1.61.0.
diff --git a/pyproject.toml b/pyproject.toml
index 8b21bdc837..7d33c08f73 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.61.0"
+version = "1.61.1"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From 09d89ddc1f875bb1ea835a7614980787d4ebd043 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Tue, 28 Jun 2022 14:41:06 +0100
Subject: [PATCH 035/178] Linkify GHSA commit

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index b97f014142..0db01d4096 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -17,7 +17,7 @@ The following issue is fixed in 1.61.1.
 
   Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality.
 
-  Fixed by fa1308061802ac7b7d20e954ba7372c5ac292333.
+  Fixed by [fa1308061802ac7b7d20e954ba7372c5ac292333](https://github.com/matrix-org/synapse/commit/fa1308061802ac7b7d20e954ba7372c5ac292333).
 
 Synapse 1.61.0 (2022-06-14)
 ===========================

From b210146fd97c58c29ee4dacab2f964e7b9b33c46 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Tue, 28 Jun 2022 16:36:08 +0100
Subject: [PATCH 036/178] 1.62.0rc1

---
 CHANGES.md                | 95 +++++++++++++++++++++++++++++++++++++++
 changelog.d/12674.misc    |  1 -
 changelog.d/12737.doc     |  1 -
 changelog.d/12738.misc    |  1 -
 changelog.d/12857.feature |  1 -
 changelog.d/12881.misc    |  1 -
 changelog.d/12893.misc    |  1 -
 changelog.d/12929.misc    |  1 -
 changelog.d/12939.bugfix  |  1 -
 changelog.d/12941.misc    |  1 -
 changelog.d/12944.misc    |  1 -
 changelog.d/12954.misc    |  1 -
 changelog.d/12957.misc    |  1 -
 changelog.d/12963.misc    |  1 -
 changelog.d/12965.misc    |  1 -
 changelog.d/12969.misc    |  1 -
 changelog.d/12970.misc    |  1 -
 changelog.d/12973.bugfix  |  1 -
 changelog.d/12979.bugfix  |  1 -
 changelog.d/12982.misc    |  1 -
 changelog.d/12984.misc    |  1 -
 changelog.d/12985.misc    |  1 -
 changelog.d/12986.misc    |  1 -
 changelog.d/12990.misc    |  1 -
 changelog.d/12991.bugfix  |  2 -
 changelog.d/13004.misc    |  1 -
 changelog.d/13005.misc    |  1 -
 changelog.d/13011.misc    |  1 -
 changelog.d/13013.misc    |  1 -
 changelog.d/13017.misc    |  1 -
 changelog.d/13018.bugfix  |  1 -
 changelog.d/13021.misc    |  1 -
 changelog.d/13022.doc     |  1 -
 changelog.d/13023.doc     |  1 -
 changelog.d/13025.misc    |  1 -
 changelog.d/13034.misc    |  1 -
 changelog.d/13035.feature |  1 -
 changelog.d/13036.feature |  1 -
 changelog.d/13041.bugfix  |  2 -
 changelog.d/13042.misc    |  1 -
 changelog.d/13045.feature |  1 -
 changelog.d/13046.misc    |  1 -
 changelog.d/13047.feature |  1 -
 changelog.d/13048.misc    |  1 -
 changelog.d/13050.misc    |  1 -
 changelog.d/13052.misc    |  1 -
 changelog.d/13054.misc    |  1 -
 changelog.d/13055.misc    |  1 -
 changelog.d/13056.feature |  1 -
 changelog.d/13057.misc    |  1 -
 changelog.d/13058.misc    |  1 -
 changelog.d/13060.misc    |  1 -
 changelog.d/13061.misc    |  1 -
 changelog.d/13062.misc    |  1 -
 changelog.d/13063.misc    |  1 -
 changelog.d/13065.misc    |  1 -
 changelog.d/13069.misc    |  1 -
 changelog.d/13070.misc    |  1 -
 changelog.d/13071.misc    |  1 -
 changelog.d/13073.doc     |  1 -
 changelog.d/13074.misc    |  1 -
 changelog.d/13075.misc    |  1 -
 changelog.d/13076.doc     |  1 -
 changelog.d/13082.misc    |  1 -
 changelog.d/13085.misc    |  1 -
 changelog.d/13087.bugfix  |  1 -
 changelog.d/13088.bugfix  |  1 -
 changelog.d/13089.misc    |  1 -
 changelog.d/13093.misc    |  1 -
 changelog.d/13095.doc     |  1 -
 changelog.d/13096.misc    |  1 -
 changelog.d/13098.feature |  1 -
 changelog.d/13099.misc    |  1 -
 changelog.d/13106.bugfix  |  1 -
 changelog.d/13112.doc     |  1 -
 changelog.d/13118.misc    |  1 -
 changelog.d/13123.removal |  1 -
 changelog.d/13124.misc    |  1 -
 debian/changelog          |  6 +++
 pyproject.toml            |  2 +-
 80 files changed, 102 insertions(+), 80 deletions(-)
 delete mode 100644 changelog.d/12674.misc
 delete mode 100644 changelog.d/12737.doc
 delete mode 100644 changelog.d/12738.misc
 delete mode 100644 changelog.d/12857.feature
 delete mode 100644 changelog.d/12881.misc
 delete mode 100644 changelog.d/12893.misc
 delete mode 100644 changelog.d/12929.misc
 delete mode 100644 changelog.d/12939.bugfix
 delete mode 100644 changelog.d/12941.misc
 delete mode 100644 changelog.d/12944.misc
 delete mode 100644 changelog.d/12954.misc
 delete mode 100644 changelog.d/12957.misc
 delete mode 100644 changelog.d/12963.misc
 delete mode 100644 changelog.d/12965.misc
 delete mode 100644 changelog.d/12969.misc
 delete mode 100644 changelog.d/12970.misc
 delete mode 100644 changelog.d/12973.bugfix
 delete mode 100644 changelog.d/12979.bugfix
 delete mode 100644 changelog.d/12982.misc
 delete mode 100644 changelog.d/12984.misc
 delete mode 100644 changelog.d/12985.misc
 delete mode 100644 changelog.d/12986.misc
 delete mode 100644 changelog.d/12990.misc
 delete mode 100644 changelog.d/12991.bugfix
 delete mode 100644 changelog.d/13004.misc
 delete mode 100644 changelog.d/13005.misc
 delete mode 100644 changelog.d/13011.misc
 delete mode 100644 changelog.d/13013.misc
 delete mode 100644 changelog.d/13017.misc
 delete mode 100644 changelog.d/13018.bugfix
 delete mode 100644 changelog.d/13021.misc
 delete mode 100644 changelog.d/13022.doc
 delete mode 100644 changelog.d/13023.doc
 delete mode 100644 changelog.d/13025.misc
 delete mode 100644 changelog.d/13034.misc
 delete mode 100644 changelog.d/13035.feature
 delete mode 100644 changelog.d/13036.feature
 delete mode 100644 changelog.d/13041.bugfix
 delete mode 100644 changelog.d/13042.misc
 delete mode 100644 changelog.d/13045.feature
 delete mode 100644 changelog.d/13046.misc
 delete mode 100644 changelog.d/13047.feature
 delete mode 100644 changelog.d/13048.misc
 delete mode 100644 changelog.d/13050.misc
 delete mode 100644 changelog.d/13052.misc
 delete mode 100644 changelog.d/13054.misc
 delete mode 100644 changelog.d/13055.misc
 delete mode 100644 changelog.d/13056.feature
 delete mode 100644 changelog.d/13057.misc
 delete mode 100644 changelog.d/13058.misc
 delete mode 100644 changelog.d/13060.misc
 delete mode 100644 changelog.d/13061.misc
 delete mode 100644 changelog.d/13062.misc
 delete mode 100644 changelog.d/13063.misc
 delete mode 100644 changelog.d/13065.misc
 delete mode 100644 changelog.d/13069.misc
 delete mode 100644 changelog.d/13070.misc
 delete mode 100644 changelog.d/13071.misc
 delete mode 100644 changelog.d/13073.doc
 delete mode 100644 changelog.d/13074.misc
 delete mode 100644 changelog.d/13075.misc
 delete mode 100644 changelog.d/13076.doc
 delete mode 100644 changelog.d/13082.misc
 delete mode 100644 changelog.d/13085.misc
 delete mode 100644 changelog.d/13087.bugfix
 delete mode 100644 changelog.d/13088.bugfix
 delete mode 100644 changelog.d/13089.misc
 delete mode 100644 changelog.d/13093.misc
 delete mode 100644 changelog.d/13095.doc
 delete mode 100644 changelog.d/13096.misc
 delete mode 100644 changelog.d/13098.feature
 delete mode 100644 changelog.d/13099.misc
 delete mode 100644 changelog.d/13106.bugfix
 delete mode 100644 changelog.d/13112.doc
 delete mode 100644 changelog.d/13118.misc
 delete mode 100644 changelog.d/13123.removal
 delete mode 100644 changelog.d/13124.misc

diff --git a/CHANGES.md b/CHANGES.md
index 0db01d4096..4c1decf8f4 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,98 @@
+Synapse 1.62.0rc1 (2022-06-28)
+==============================
+
+Features
+--------
+
+- Port the spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected. ([\#12857](https://github.com/matrix-org/synapse/issues/12857), [\#13047](https://github.com/matrix-org/synapse/issues/13047))
+- Allow server admins to customise the response of the `/.well-known/matrix/client` endpoint. ([\#13035](https://github.com/matrix-org/synapse/issues/13035))
+- Add metrics measuring the CPU and DB time spent in state resolution. ([\#13036](https://github.com/matrix-org/synapse/issues/13036))
+- Speed up fetching of device list changes in `/sync` and `/keys/changes`. ([\#13045](https://github.com/matrix-org/synapse/issues/13045), [\#13098](https://github.com/matrix-org/synapse/issues/13098))
+- Improve URL previews for sites which only provide Twitter Card metadata, e.g. LWN.net. ([\#13056](https://github.com/matrix-org/synapse/issues/13056))
+
+
+Bugfixes
+--------
+
+- Update [MSC3786](https://github.com/matrix-org/matrix-spec-proposals/pull/3786) implementation to check `state_key`. ([\#12939](https://github.com/matrix-org/synapse/issues/12939))
+- Fix a bug introduced in Synapse 1.58 where Synapse would not report full version information when installed from a git checkout. This is a best-effort affair and not guaranteed to be stable. ([\#12973](https://github.com/matrix-org/synapse/issues/12973))
+- Fix a bug introduced in Synapse 1.60 where Synapse would fail to start if the `sqlite3` module was not available. ([\#12979](https://github.com/matrix-org/synapse/issues/12979))
+- Fix a bug where non-standard information was required when requesting the `/hierarchy` API over federation. Introduced 
+  in Synapse v1.41.0. ([\#12991](https://github.com/matrix-org/synapse/issues/12991))
+- Fix a long-standing bug which meant that rate limiting was not restrictive enough in some cases. ([\#13018](https://github.com/matrix-org/synapse/issues/13018))
+- Fix a bug introduced in Synapse 1.58 where profile requests for a malformed user ID would ccause an internal error. Synapse now returns 400 Bad Request in this situation. ([\#13041](https://github.com/matrix-org/synapse/issues/13041))
+- Fix some inconsistencies in the event authentication code. ([\#13087](https://github.com/matrix-org/synapse/issues/13087), [\#13088](https://github.com/matrix-org/synapse/issues/13088))
+- Fix a long-standing bug where room directory requests would cause an internal server error if given a malformed room alias. ([\#13106](https://github.com/matrix-org/synapse/issues/13106))
+
+
+Improved Documentation
+----------------------
+
+- Add documentation for how to configure Synapse with Workers using Docker Compose. Includes example worker config and docker-compose.yaml. Contributed by @Thumbscrew. ([\#12737](https://github.com/matrix-org/synapse/issues/12737))
+- Ensure the [Poetry cheat sheet](https://matrix-org.github.io/synapse/develop/development/dependencies.html) is available in the online documentation. ([\#13022](https://github.com/matrix-org/synapse/issues/13022))
+- Mention removed community/group worker endpoints in upgrade.md. Contributed by @olmari. ([\#13023](https://github.com/matrix-org/synapse/issues/13023))
+- Add instructions for running Complement with `gotestfmt`-formatted output locally. ([\#13073](https://github.com/matrix-org/synapse/issues/13073))
+- Update OpenTracing docs to reference the configuration manual rather than the configuration file. ([\#13076](https://github.com/matrix-org/synapse/issues/13076))
+- Update information on downstream Debian packages. ([\#13095](https://github.com/matrix-org/synapse/issues/13095))
+- Remove documentation for the Delete Group Admin API which no longer exists. ([\#13112](https://github.com/matrix-org/synapse/issues/13112))
+
+
+Deprecations and Removals
+-------------------------
+
+- Remove the unspecced `DELETE /directory/list/room/{roomId}` endpoint, which hid rooms from the [public room directory](https://spec.matrix.org/v1.3/client-server-api/#listing-rooms). Instead, `PUT` to the same URL with a visibility of `"private"`. ([\#13123](https://github.com/matrix-org/synapse/issues/13123))
+
+
+Internal Changes
+----------------
+
+- Add tests for cancellation of `GET /rooms/$room_id/members` and `GET /rooms/$room_id/state` requests. ([\#12674](https://github.com/matrix-org/synapse/issues/12674))
+- Report login failures due to unknown third party identifiers in the same way as failures due to invalid passwords. This prevents an attacker from using the error response to determine if the identifier exists. Contributed by Daniel Aloni. ([\#12738](https://github.com/matrix-org/synapse/issues/12738))
+- Merge the Complement testing Docker images into a single, multi-purpose image. ([\#12881](https://github.com/matrix-org/synapse/issues/12881), [\#13075](https://github.com/matrix-org/synapse/issues/13075))
+- Simplify the database schema for `event_edges`. ([\#12893](https://github.com/matrix-org/synapse/issues/12893))
+- Clean up the test code for client disconnection. ([\#12929](https://github.com/matrix-org/synapse/issues/12929))
+- Remove code generating comments in configuration. ([\#12941](https://github.com/matrix-org/synapse/issues/12941))
+- Add `Cross-Origin-Resource-Policy: cross-origin` header to content repository's thumbnail and download endpoints. ([\#12944](https://github.com/matrix-org/synapse/issues/12944))
+- Replace noop background updates with `DELETE` delta. ([\#12954](https://github.com/matrix-org/synapse/issues/12954), [\#13050](https://github.com/matrix-org/synapse/issues/13050))
+- Use lower isolation level when inserting read receipts to avoid serialization errors. Contributed by Nick @ Beeper. ([\#12957](https://github.com/matrix-org/synapse/issues/12957))
+- Reduce the amount of state we pull from the DB. ([\#12963](https://github.com/matrix-org/synapse/issues/12963))
+- Enable testing against PostgreSQL databases in Complement CI. ([\#12965](https://github.com/matrix-org/synapse/issues/12965), [\#13034](https://github.com/matrix-org/synapse/issues/13034))
+- Fix an inaccurate comment. ([\#12969](https://github.com/matrix-org/synapse/issues/12969))
+- Remove the `delete_device` method and always call `delete_devices`. ([\#12970](https://github.com/matrix-org/synapse/issues/12970))
+- Use a GitHub form for issues rather than a hard-to-read, easy-to-ignore template. ([\#12982](https://github.com/matrix-org/synapse/issues/12982))
+- Move [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) behind an experimental config flag. ([\#12984](https://github.com/matrix-org/synapse/issues/12984))
+- Add type hints to tests. ([\#12985](https://github.com/matrix-org/synapse/issues/12985), [\#13099](https://github.com/matrix-org/synapse/issues/13099))
+- Refactor macaroon tokens generation and move the unsubscribe link in notification emails to `/_synapse/client/unsubscribe`. ([\#12986](https://github.com/matrix-org/synapse/issues/12986))
+- Fix documentation for running complement tests. ([\#12990](https://github.com/matrix-org/synapse/issues/12990))
+- Faster joins: add issue links to the TODO comments in the code. ([\#13004](https://github.com/matrix-org/synapse/issues/13004))
+- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13005](https://github.com/matrix-org/synapse/issues/13005), [\#13096](https://github.com/matrix-org/synapse/issues/13096), [\#13118](https://github.com/matrix-org/synapse/issues/13118))
+- Replaced usage of PyJWT with methods from Authlib in `org.matrix.login.jwt`. Contributed by Hannes Lerchl. ([\#13011](https://github.com/matrix-org/synapse/issues/13011))
+- Modernize the `contrib/graph/` scripts. ([\#13013](https://github.com/matrix-org/synapse/issues/13013))
+- Remove redundant `room_version` parameters from event auth functions. ([\#13017](https://github.com/matrix-org/synapse/issues/13017))
+- Decouple `synapse.api.auth_blocking.AuthBlocking` from `synapse.api.auth.Auth`. ([\#13021](https://github.com/matrix-org/synapse/issues/13021))
+- Add type annotations to `synapse.storage.databases.main.devices`. ([\#13025](https://github.com/matrix-org/synapse/issues/13025))
+- Set default `sync_response_cache_duration` to two minutes. ([\#13042](https://github.com/matrix-org/synapse/issues/13042))
+- Rename CI test runs. ([\#13046](https://github.com/matrix-org/synapse/issues/13046))
+- Increase timeout of complement CI test runs. ([\#13048](https://github.com/matrix-org/synapse/issues/13048))
+- Refactor entry points so that they all have a `main` function. ([\#13052](https://github.com/matrix-org/synapse/issues/13052))
+- Refactor the Dockerfile-workers configuration script to use Jinja2 templates in Synapse workers' Supervisord blocks. ([\#13054](https://github.com/matrix-org/synapse/issues/13054))
+- Add headers to individual options in config documentation to allow for linking. ([\#13055](https://github.com/matrix-org/synapse/issues/13055))
+- Make Complement CI logs easier to read. ([\#13057](https://github.com/matrix-org/synapse/issues/13057), [\#13058](https://github.com/matrix-org/synapse/issues/13058), [\#13069](https://github.com/matrix-org/synapse/issues/13069))
+- Don't instantiate modules with keyword arguments. ([\#13060](https://github.com/matrix-org/synapse/issues/13060))
+- Fix type checking errors against Twisted trunk. ([\#13061](https://github.com/matrix-org/synapse/issues/13061))
+- Allow MSC3030 `timestamp_to_event` calls from anyone on world-readable rooms. ([\#13062](https://github.com/matrix-org/synapse/issues/13062))
+- Add a CI job to check that schema deltas are in the correct folder. ([\#13063](https://github.com/matrix-org/synapse/issues/13063))
+- Avoid rechecking event auth rules which are independent of room state. ([\#13065](https://github.com/matrix-org/synapse/issues/13065))
+- Reduce the duplication of code that invokes the rate limiter. ([\#13070](https://github.com/matrix-org/synapse/issues/13070))
+- Add a Subject Alternative Name to the certificate generated for Complement tests. ([\#13071](https://github.com/matrix-org/synapse/issues/13071))
+- Add more tests for room upgrades. ([\#13074](https://github.com/matrix-org/synapse/issues/13074))
+- Pin dependencies maintained by matrix.org to [semantic version](https://semver.org/) bounds. ([\#13082](https://github.com/matrix-org/synapse/issues/13082))
+- Correctly report prometheus DB stats for `get_earliest_token_for_stats`. ([\#13085](https://github.com/matrix-org/synapse/issues/13085))
+- Fix a long-standing bug where a finished logging context would be re-started when Synapse failed to persist an event from federation. ([\#13089](https://github.com/matrix-org/synapse/issues/13089))
+- Simplify the alias deletion logic as an application service. ([\#13093](https://github.com/matrix-org/synapse/issues/13093))
+- Add type annotations to `tests.test_server`. ([\#13124](https://github.com/matrix-org/synapse/issues/13124))
+
+
 Synapse 1.61.1 (2022-06-28)
 ===========================
 
diff --git a/changelog.d/12674.misc b/changelog.d/12674.misc
deleted file mode 100644
index c8a8f32f0a..0000000000
--- a/changelog.d/12674.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add tests for cancellation of `GET /rooms/$room_id/members` and `GET /rooms/$room_id/state` requests.
diff --git a/changelog.d/12737.doc b/changelog.d/12737.doc
deleted file mode 100644
index ab2d1f2fd9..0000000000
--- a/changelog.d/12737.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add documentation for how to configure Synapse with Workers using Docker Compose. Includes example worker config and docker-compose.yaml. Contributed by @Thumbscrew.
\ No newline at end of file
diff --git a/changelog.d/12738.misc b/changelog.d/12738.misc
deleted file mode 100644
index 8252223475..0000000000
--- a/changelog.d/12738.misc
+++ /dev/null
@@ -1 +0,0 @@
-Report login failures due to unknown third party identifiers in the same way as failures due to invalid passwords. This prevents an attacker from using the error response to determine if the identifier exists. Contributed by Daniel Aloni.
\ No newline at end of file
diff --git a/changelog.d/12857.feature b/changelog.d/12857.feature
deleted file mode 100644
index ddd1dbe685..0000000000
--- a/changelog.d/12857.feature
+++ /dev/null
@@ -1 +0,0 @@
-Port spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected.
diff --git a/changelog.d/12881.misc b/changelog.d/12881.misc
deleted file mode 100644
index 8a83182bd4..0000000000
--- a/changelog.d/12881.misc
+++ /dev/null
@@ -1 +0,0 @@
-Merge the Complement testing Docker images into a single, multi-purpose image.
\ No newline at end of file
diff --git a/changelog.d/12893.misc b/changelog.d/12893.misc
deleted file mode 100644
index 5705210303..0000000000
--- a/changelog.d/12893.misc
+++ /dev/null
@@ -1 +0,0 @@
-Simplify the database schema for `event_edges`.
diff --git a/changelog.d/12929.misc b/changelog.d/12929.misc
deleted file mode 100644
index 20718d258d..0000000000
--- a/changelog.d/12929.misc
+++ /dev/null
@@ -1 +0,0 @@
-Clean up the test code for client disconnection.
diff --git a/changelog.d/12939.bugfix b/changelog.d/12939.bugfix
deleted file mode 100644
index d9061cf8e5..0000000000
--- a/changelog.d/12939.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Update [MSC3786](https://github.com/matrix-org/matrix-spec-proposals/pull/3786) implementation to check `state_key`.
diff --git a/changelog.d/12941.misc b/changelog.d/12941.misc
deleted file mode 100644
index 6a74f255df..0000000000
--- a/changelog.d/12941.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove code generating comments in configuration.
diff --git a/changelog.d/12944.misc b/changelog.d/12944.misc
deleted file mode 100644
index bf27fe7e2c..0000000000
--- a/changelog.d/12944.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add `Cross-Origin-Resource-Policy: cross-origin` header to content repository's thumbnail and download endpoints.
\ No newline at end of file
diff --git a/changelog.d/12954.misc b/changelog.d/12954.misc
deleted file mode 100644
index 20bf136732..0000000000
--- a/changelog.d/12954.misc
+++ /dev/null
@@ -1 +0,0 @@
-Replace noop background updates with `DELETE` delta.
diff --git a/changelog.d/12957.misc b/changelog.d/12957.misc
deleted file mode 100644
index 0c075276ec..0000000000
--- a/changelog.d/12957.misc
+++ /dev/null
@@ -1 +0,0 @@
-Use lower isolation level when inserting read receipts to avoid serialization errors. Contributed by Nick @ Beeper.
diff --git a/changelog.d/12963.misc b/changelog.d/12963.misc
deleted file mode 100644
index d57e1aca6b..0000000000
--- a/changelog.d/12963.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce the amount of state we pull from the DB.
diff --git a/changelog.d/12965.misc b/changelog.d/12965.misc
deleted file mode 100644
index cc2823e12b..0000000000
--- a/changelog.d/12965.misc
+++ /dev/null
@@ -1 +0,0 @@
-Enable testing against PostgreSQL databases in Complement CI.
\ No newline at end of file
diff --git a/changelog.d/12969.misc b/changelog.d/12969.misc
deleted file mode 100644
index 05de7ce839..0000000000
--- a/changelog.d/12969.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix an inaccurate comment.
diff --git a/changelog.d/12970.misc b/changelog.d/12970.misc
deleted file mode 100644
index 8f874aa07b..0000000000
--- a/changelog.d/12970.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove the `delete_device` method and always call `delete_devices`.
diff --git a/changelog.d/12973.bugfix b/changelog.d/12973.bugfix
deleted file mode 100644
index 1bf45854ff..0000000000
--- a/changelog.d/12973.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug introduced in Synapse 1.58 where Synapse would not report full version information when installed from a git checkout. This is a best-effort affair and not guaranteed to be stable.
diff --git a/changelog.d/12979.bugfix b/changelog.d/12979.bugfix
deleted file mode 100644
index 6b54408025..0000000000
--- a/changelog.d/12979.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug introduced in Synapse 1.60 where Synapse would fail to start if the `sqlite3` module was not available.
diff --git a/changelog.d/12982.misc b/changelog.d/12982.misc
deleted file mode 100644
index 036b69efe6..0000000000
--- a/changelog.d/12982.misc
+++ /dev/null
@@ -1 +0,0 @@
-Use a GitHub form for issues rather than a hard-to-read, easy-to-ignore template.
\ No newline at end of file
diff --git a/changelog.d/12984.misc b/changelog.d/12984.misc
deleted file mode 100644
index a902017180..0000000000
--- a/changelog.d/12984.misc
+++ /dev/null
@@ -1 +0,0 @@
-Move [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) behind an experimental config flag.
diff --git a/changelog.d/12985.misc b/changelog.d/12985.misc
deleted file mode 100644
index 7f6492d587..0000000000
--- a/changelog.d/12985.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type hints to tests.
diff --git a/changelog.d/12986.misc b/changelog.d/12986.misc
deleted file mode 100644
index 937b888023..0000000000
--- a/changelog.d/12986.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor macaroon tokens generation and move the unsubscribe link in notification emails to `/_synapse/client/unsubscribe`.
diff --git a/changelog.d/12990.misc b/changelog.d/12990.misc
deleted file mode 100644
index c68f6a731e..0000000000
--- a/changelog.d/12990.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix documentation for running complement tests.
diff --git a/changelog.d/12991.bugfix b/changelog.d/12991.bugfix
deleted file mode 100644
index c6e388d5b9..0000000000
--- a/changelog.d/12991.bugfix
+++ /dev/null
@@ -1,2 +0,0 @@
-Fix a bug where non-standard information was required when requesting the `/hierarchy` API over federation. Introduced 
-in Synapse v1.41.0.
diff --git a/changelog.d/13004.misc b/changelog.d/13004.misc
deleted file mode 100644
index d8e93d87af..0000000000
--- a/changelog.d/13004.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster joins: add issue links to the TODO comments in the code.
diff --git a/changelog.d/13005.misc b/changelog.d/13005.misc
deleted file mode 100644
index 3bb51962e7..0000000000
--- a/changelog.d/13005.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/changelog.d/13011.misc b/changelog.d/13011.misc
deleted file mode 100644
index 4da223219f..0000000000
--- a/changelog.d/13011.misc
+++ /dev/null
@@ -1 +0,0 @@
-Replaced usage of PyJWT with methods from Authlib in `org.matrix.login.jwt`. Contributed by Hannes Lerchl.
diff --git a/changelog.d/13013.misc b/changelog.d/13013.misc
deleted file mode 100644
index 903c6a3c8a..0000000000
--- a/changelog.d/13013.misc
+++ /dev/null
@@ -1 +0,0 @@
-Modernize the `contrib/graph/` scripts.
diff --git a/changelog.d/13017.misc b/changelog.d/13017.misc
deleted file mode 100644
index b314687f9c..0000000000
--- a/changelog.d/13017.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove redundant `room_version` parameters from event auth functions.
diff --git a/changelog.d/13018.bugfix b/changelog.d/13018.bugfix
deleted file mode 100644
index a84657f04f..0000000000
--- a/changelog.d/13018.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug which meant that rate limiting was not restrictive enough in some cases.
\ No newline at end of file
diff --git a/changelog.d/13021.misc b/changelog.d/13021.misc
deleted file mode 100644
index 84c41cdf59..0000000000
--- a/changelog.d/13021.misc
+++ /dev/null
@@ -1 +0,0 @@
-Decouple `synapse.api.auth_blocking.AuthBlocking` from `synapse.api.auth.Auth`.
diff --git a/changelog.d/13022.doc b/changelog.d/13022.doc
deleted file mode 100644
index 4d6ac7ae94..0000000000
--- a/changelog.d/13022.doc
+++ /dev/null
@@ -1 +0,0 @@
-Ensure the [Poetry cheat sheet](https://matrix-org.github.io/synapse/develop/development/dependencies.html) is available in the online documentation.
diff --git a/changelog.d/13023.doc b/changelog.d/13023.doc
deleted file mode 100644
index 5589c7492c..0000000000
--- a/changelog.d/13023.doc
+++ /dev/null
@@ -1 +0,0 @@
-Mention removed community/group worker endpoints in upgrade.md. Contributed by @olmari.
\ No newline at end of file
diff --git a/changelog.d/13025.misc b/changelog.d/13025.misc
deleted file mode 100644
index 7cb0d174b7..0000000000
--- a/changelog.d/13025.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type annotations to `synapse.storage.databases.main.devices`.
diff --git a/changelog.d/13034.misc b/changelog.d/13034.misc
deleted file mode 100644
index cc2823e12b..0000000000
--- a/changelog.d/13034.misc
+++ /dev/null
@@ -1 +0,0 @@
-Enable testing against PostgreSQL databases in Complement CI.
\ No newline at end of file
diff --git a/changelog.d/13035.feature b/changelog.d/13035.feature
deleted file mode 100644
index cfca3ab4b7..0000000000
--- a/changelog.d/13035.feature
+++ /dev/null
@@ -1 +0,0 @@
-Allow server admins to customise the response of the `/.well-known/matrix/client` endpoint.
diff --git a/changelog.d/13036.feature b/changelog.d/13036.feature
deleted file mode 100644
index 71e5a29fe9..0000000000
--- a/changelog.d/13036.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add metrics measuring the CPU and DB time spent in state resolution.
diff --git a/changelog.d/13041.bugfix b/changelog.d/13041.bugfix
deleted file mode 100644
index edb1635eb9..0000000000
--- a/changelog.d/13041.bugfix
+++ /dev/null
@@ -1,2 +0,0 @@
-Fix a bug introduced in Synapse 1.58 where profile requests for a malformed user ID would ccause an internal error. Synapse now returns 400 Bad Request in this situation.
-
diff --git a/changelog.d/13042.misc b/changelog.d/13042.misc
deleted file mode 100644
index 745d5fcf84..0000000000
--- a/changelog.d/13042.misc
+++ /dev/null
@@ -1 +0,0 @@
-Set default `sync_response_cache_duration` to two minutes.
diff --git a/changelog.d/13045.feature b/changelog.d/13045.feature
deleted file mode 100644
index 7b0667ba95..0000000000
--- a/changelog.d/13045.feature
+++ /dev/null
@@ -1 +0,0 @@
-Speed up fetching of device list changes in `/sync` and `/keys/changes`.
diff --git a/changelog.d/13046.misc b/changelog.d/13046.misc
deleted file mode 100644
index 1248c34d39..0000000000
--- a/changelog.d/13046.misc
+++ /dev/null
@@ -1 +0,0 @@
-Rename CI test runs.
diff --git a/changelog.d/13047.feature b/changelog.d/13047.feature
deleted file mode 100644
index ddd1dbe685..0000000000
--- a/changelog.d/13047.feature
+++ /dev/null
@@ -1 +0,0 @@
-Port spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected.
diff --git a/changelog.d/13048.misc b/changelog.d/13048.misc
deleted file mode 100644
index 073c8b1a96..0000000000
--- a/changelog.d/13048.misc
+++ /dev/null
@@ -1 +0,0 @@
-Increase timeout of complement CI test runs.
diff --git a/changelog.d/13050.misc b/changelog.d/13050.misc
deleted file mode 100644
index 20bf136732..0000000000
--- a/changelog.d/13050.misc
+++ /dev/null
@@ -1 +0,0 @@
-Replace noop background updates with `DELETE` delta.
diff --git a/changelog.d/13052.misc b/changelog.d/13052.misc
deleted file mode 100644
index 0d11dfb12a..0000000000
--- a/changelog.d/13052.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor entry points so that they all have a `main` function.
\ No newline at end of file
diff --git a/changelog.d/13054.misc b/changelog.d/13054.misc
deleted file mode 100644
index 0880553739..0000000000
--- a/changelog.d/13054.misc
+++ /dev/null
@@ -1 +0,0 @@
-Refactor the Dockerfile-workers configuration script to use Jinja2 templates in Synapse workers' Supervisord blocks.
\ No newline at end of file
diff --git a/changelog.d/13055.misc b/changelog.d/13055.misc
deleted file mode 100644
index 92a02a6080..0000000000
--- a/changelog.d/13055.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add headers to individual options in config documentation to allow for linking.
diff --git a/changelog.d/13056.feature b/changelog.d/13056.feature
deleted file mode 100644
index 219e2f6c1e..0000000000
--- a/changelog.d/13056.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve URL previews for sites which only provide Twitter Card metadata, e.g. LWN.net.
diff --git a/changelog.d/13057.misc b/changelog.d/13057.misc
deleted file mode 100644
index 4102bf96b5..0000000000
--- a/changelog.d/13057.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make Complement CI logs easier to read.
\ No newline at end of file
diff --git a/changelog.d/13058.misc b/changelog.d/13058.misc
deleted file mode 100644
index 4102bf96b5..0000000000
--- a/changelog.d/13058.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make Complement CI logs easier to read.
\ No newline at end of file
diff --git a/changelog.d/13060.misc b/changelog.d/13060.misc
deleted file mode 100644
index c2376701f4..0000000000
--- a/changelog.d/13060.misc
+++ /dev/null
@@ -1 +0,0 @@
-Don't instantiate modules with keyword arguments.
diff --git a/changelog.d/13061.misc b/changelog.d/13061.misc
deleted file mode 100644
index 4c55e2b4ed..0000000000
--- a/changelog.d/13061.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix type checking errors against Twisted trunk.
diff --git a/changelog.d/13062.misc b/changelog.d/13062.misc
deleted file mode 100644
index d425e9a9ac..0000000000
--- a/changelog.d/13062.misc
+++ /dev/null
@@ -1 +0,0 @@
-Allow MSC3030 'timestamp_to_event' calls from anyone on world-readable rooms.
diff --git a/changelog.d/13063.misc b/changelog.d/13063.misc
deleted file mode 100644
index 167d6d2cd5..0000000000
--- a/changelog.d/13063.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add a CI job to check that schema deltas are in the correct folder.
diff --git a/changelog.d/13065.misc b/changelog.d/13065.misc
deleted file mode 100644
index e9e8a7659a..0000000000
--- a/changelog.d/13065.misc
+++ /dev/null
@@ -1 +0,0 @@
-Avoid rechecking event auth rules which are independent of room state.
diff --git a/changelog.d/13069.misc b/changelog.d/13069.misc
deleted file mode 100644
index 4102bf96b5..0000000000
--- a/changelog.d/13069.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make Complement CI logs easier to read.
\ No newline at end of file
diff --git a/changelog.d/13070.misc b/changelog.d/13070.misc
deleted file mode 100644
index ce1f14342d..0000000000
--- a/changelog.d/13070.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce the duplication of code that invokes the rate limiter.
\ No newline at end of file
diff --git a/changelog.d/13071.misc b/changelog.d/13071.misc
deleted file mode 100644
index a6e1e6b3a8..0000000000
--- a/changelog.d/13071.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add a Subject Alternative Name to the certificate generated for Complement tests.
\ No newline at end of file
diff --git a/changelog.d/13073.doc b/changelog.d/13073.doc
deleted file mode 100644
index e162a8404e..0000000000
--- a/changelog.d/13073.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add instructions for running Complement with `gotestfmt`-formatted output locally.
\ No newline at end of file
diff --git a/changelog.d/13074.misc b/changelog.d/13074.misc
deleted file mode 100644
index a502e44d92..0000000000
--- a/changelog.d/13074.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add more tests for room upgrades.
diff --git a/changelog.d/13075.misc b/changelog.d/13075.misc
deleted file mode 100644
index 2311629f7b..0000000000
--- a/changelog.d/13075.misc
+++ /dev/null
@@ -1 +0,0 @@
-Merge the Complement testing Docker images into a single, multi-purpose image.
diff --git a/changelog.d/13076.doc b/changelog.d/13076.doc
deleted file mode 100644
index 75dc4630ea..0000000000
--- a/changelog.d/13076.doc
+++ /dev/null
@@ -1 +0,0 @@
-Update OpenTracing docs to reference the configuration manual rather than the configuration file.
diff --git a/changelog.d/13082.misc b/changelog.d/13082.misc
deleted file mode 100644
index 1aa386dbf7..0000000000
--- a/changelog.d/13082.misc
+++ /dev/null
@@ -1 +0,0 @@
-Pin dependencies maintained by matrix.org to [semantic version](https://semver.org/) bounds.
diff --git a/changelog.d/13085.misc b/changelog.d/13085.misc
deleted file mode 100644
index 2401d4f388..0000000000
--- a/changelog.d/13085.misc
+++ /dev/null
@@ -1 +0,0 @@
-Correctly report prometheus DB stats for `get_earliest_token_for_stats`.
diff --git a/changelog.d/13087.bugfix b/changelog.d/13087.bugfix
deleted file mode 100644
index 7c69801afe..0000000000
--- a/changelog.d/13087.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix some inconsistencies in the event authentication code.
diff --git a/changelog.d/13088.bugfix b/changelog.d/13088.bugfix
deleted file mode 100644
index 7c69801afe..0000000000
--- a/changelog.d/13088.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix some inconsistencies in the event authentication code.
diff --git a/changelog.d/13089.misc b/changelog.d/13089.misc
deleted file mode 100644
index 5868507cb7..0000000000
--- a/changelog.d/13089.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where a finished logging context would be re-started when Synapse failed to persist an event from federation.
diff --git a/changelog.d/13093.misc b/changelog.d/13093.misc
deleted file mode 100644
index 2547c87fa4..0000000000
--- a/changelog.d/13093.misc
+++ /dev/null
@@ -1 +0,0 @@
-Simplify the alias deletion logic as an application service.
diff --git a/changelog.d/13095.doc b/changelog.d/13095.doc
deleted file mode 100644
index 4651f25e14..0000000000
--- a/changelog.d/13095.doc
+++ /dev/null
@@ -1 +0,0 @@
-Update information on downstream Debian packages.
diff --git a/changelog.d/13096.misc b/changelog.d/13096.misc
deleted file mode 100644
index 3bb51962e7..0000000000
--- a/changelog.d/13096.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/changelog.d/13098.feature b/changelog.d/13098.feature
deleted file mode 100644
index 7b0667ba95..0000000000
--- a/changelog.d/13098.feature
+++ /dev/null
@@ -1 +0,0 @@
-Speed up fetching of device list changes in `/sync` and `/keys/changes`.
diff --git a/changelog.d/13099.misc b/changelog.d/13099.misc
deleted file mode 100644
index 7f6492d587..0000000000
--- a/changelog.d/13099.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type hints to tests.
diff --git a/changelog.d/13106.bugfix b/changelog.d/13106.bugfix
deleted file mode 100644
index 0dc16bad08..0000000000
--- a/changelog.d/13106.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where room directory requests would cause an internal server error if given a malformed room alias.
\ No newline at end of file
diff --git a/changelog.d/13112.doc b/changelog.d/13112.doc
deleted file mode 100644
index 4b99951c70..0000000000
--- a/changelog.d/13112.doc
+++ /dev/null
@@ -1 +0,0 @@
-Remove documentation for the Delete Group Admin API which no longer exists.
\ No newline at end of file
diff --git a/changelog.d/13118.misc b/changelog.d/13118.misc
deleted file mode 100644
index 3bb51962e7..0000000000
--- a/changelog.d/13118.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/changelog.d/13123.removal b/changelog.d/13123.removal
deleted file mode 100644
index f013f16163..0000000000
--- a/changelog.d/13123.removal
+++ /dev/null
@@ -1 +0,0 @@
-Remove the unspecced `DELETE /directory/list/room/{roomId}` endpoint, which hid rooms from the [public room directory](https://spec.matrix.org/v1.3/client-server-api/#listing-rooms). Instead, `PUT` to the same URL with a visibility of `"private"`.
\ No newline at end of file
diff --git a/changelog.d/13124.misc b/changelog.d/13124.misc
deleted file mode 100644
index 513078f8d6..0000000000
--- a/changelog.d/13124.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type annotations to `tests.test_server`.
diff --git a/debian/changelog b/debian/changelog
index 2ca565a157..7fbd9baef6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.62.0~rc1) stable; urgency=medium
+
+  * New Synapse release 1.62.0rc1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 28 Jun 2022 16:34:57 +0100
+
 matrix-synapse-py3 (1.61.1) stable; urgency=medium
 
   * New Synapse release 1.61.1.
diff --git a/pyproject.toml b/pyproject.toml
index df44ee3140..8b66d3a9e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.61.1"
+version = "1.62.0rc1"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From bc9b0912cc147713c42e850fbfbb4ee396c8c839 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Tue, 28 Jun 2022 16:47:04 +0100
Subject: [PATCH 037/178] fix linting error from the 1.61.1 main -> develop
 merge

---
 synapse/rest/media/v1/preview_html.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index afe4e29758..516d0434f0 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -20,7 +20,8 @@ from typing import (
     Dict,
     Generator,
     Iterable,
-   List, Optional,
+    List,
+    Optional,
     Set,
     Union,
 )

From cdc02594491b9410f250f0adc4ea6d223aa3de7f Mon Sep 17 00:00:00 2001
From: jejo86 <28619134+jejo86@users.noreply.github.com>
Date: Wed, 29 Jun 2022 11:24:10 +0200
Subject: [PATCH 038/178] Document the `--report-stats` argument (#13029)

Signed-off-by: jejo86 <28619134+jejo86@users.noreply.github.com>
---
 changelog.d/13029.doc      | 1 +
 docs/setup/installation.md | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13029.doc

diff --git a/changelog.d/13029.doc b/changelog.d/13029.doc
new file mode 100644
index 0000000000..d398f0fdbe
--- /dev/null
+++ b/changelog.d/13029.doc
@@ -0,0 +1 @@
+Add an explanation of the `--report-stats` argument to the docs.
diff --git a/docs/setup/installation.md b/docs/setup/installation.md
index 5bdefe2bc1..1580529fd1 100644
--- a/docs/setup/installation.md
+++ b/docs/setup/installation.md
@@ -232,7 +232,9 @@ python -m synapse.app.homeserver \
     --report-stats=[yes|no]
 ```
 
-... substituting an appropriate value for `--server-name`.
+... substituting an appropriate value for `--server-name` and choosing whether
+or not to report usage statistics (hostname, Synapse version, uptime, total
+users, etc.) to the developers via the `--report-stats` argument.
 
 This command will generate you a config file that you can then customise, but it will
 also generate a set of keys for you. These keys will allow your homeserver to

From 92a0c18ef0f42b80e382667141e6593ab30e3776 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Jun 2022 11:32:38 +0100
Subject: [PATCH 039/178] Improve performance of getting unread counts in rooms
 (#13119)

---
 changelog.d/13119.misc                        |  1 +
 synapse/_scripts/synapse_port_db.py           |  3 +++
 synapse/storage/databases/main/__init__.py    |  2 +-
 .../databases/main/event_push_actions.py      | 16 ++++++++++++---
 synapse/storage/databases/main/stream.py      | 20 +++++++++++++++++++
 tests/storage/test_event_push_actions.py      |  2 ++
 6 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13119.misc

diff --git a/changelog.d/13119.misc b/changelog.d/13119.misc
new file mode 100644
index 0000000000..3bb51962e7
--- /dev/null
+++ b/changelog.d/13119.misc
@@ -0,0 +1 @@
+Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 9c06c837dc..f3f9c6d54c 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -270,6 +270,9 @@ class MockHomeserver:
     def get_instance_name(self) -> str:
         return "master"
 
+    def should_send_federation(self) -> bool:
+        return False
+
 
 class Porter:
     def __init__(
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 57aaf778ec..a3d31d3737 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -87,7 +87,6 @@ class DataStore(
     RoomStore,
     RoomBatchStore,
     RegistrationStore,
-    StreamWorkerStore,
     ProfileStore,
     PresenceStore,
     TransactionWorkerStore,
@@ -112,6 +111,7 @@ class DataStore(
     SearchStore,
     TagsStore,
     AccountDataStore,
+    StreamWorkerStore,
     OpenIdStore,
     ClientIpWorkerStore,
     DeviceStore,
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 80ca2fd0b6..eae41d7484 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -25,8 +25,8 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
-from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
+from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -122,7 +122,7 @@ def _deserialize_action(actions: str, is_highlight: bool) -> List[Union[dict, st
         return DEFAULT_NOTIF_ACTION
 
 
-class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBaseStore):
+class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBaseStore):
     def __init__(
         self,
         database: DatabasePool,
@@ -218,7 +218,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
                 retcol="event_id",
             )
 
-            stream_ordering = self.get_stream_id_for_event_txn(txn, event_id)  # type: ignore[attr-defined]
+            stream_ordering = self.get_stream_id_for_event_txn(txn, event_id)
 
         return self._get_unread_counts_by_pos_txn(
             txn, room_id, user_id, stream_ordering
@@ -307,12 +307,22 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         actions that have been deleted from `event_push_actions` table.
         """
 
+        # If there have been no events in the room since the stream ordering,
+        # there can't be any push actions either.
+        if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering):
+            return 0, 0
+
         clause = ""
         args = [user_id, room_id, stream_ordering]
         if max_stream_ordering is not None:
             clause = "AND ea.stream_ordering <= ?"
             args.append(max_stream_ordering)
 
+            # If the max stream ordering is less than the min stream ordering,
+            # then obviously there are zero push actions in that range.
+            if max_stream_ordering <= stream_ordering:
+                return 0, 0
+
         sql = f"""
             SELECT
                COUNT(CASE WHEN notif = 1 THEN 1 END),
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 8e88784d3c..3a1df7776c 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -46,10 +46,12 @@ from typing import (
     Set,
     Tuple,
     cast,
+    overload,
 )
 
 import attr
 from frozendict import frozendict
+from typing_extensions import Literal
 
 from twisted.internet import defer
 
@@ -795,6 +797,24 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
         return RoomStreamToken(topo, stream_ordering)
 
+    @overload
+    def get_stream_id_for_event_txn(
+        self,
+        txn: LoggingTransaction,
+        event_id: str,
+        allow_none: Literal[False] = False,
+    ) -> int:
+        ...
+
+    @overload
+    def get_stream_id_for_event_txn(
+        self,
+        txn: LoggingTransaction,
+        event_id: str,
+        allow_none: bool = False,
+    ) -> Optional[int]:
+        ...
+
     def get_stream_id_for_event_txn(
         self,
         txn: LoggingTransaction,
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index ef069a8110..a5a2dab21c 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -86,6 +86,8 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             event.internal_metadata.is_outlier.return_value = False
             event.depth = stream
 
+            self.store._events_stream_cache.entity_has_changed(room_id, stream)
+
             self.get_success(
                 self.store.db_pool.simple_insert(
                     table="events",

From e714b8a057f65fe07b4f3939e018e57862980cdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20St=C3=BCckler?= <moritz.stueckler@gmail.com>
Date: Wed, 29 Jun 2022 18:41:39 +0200
Subject: [PATCH 040/178] Fix documentation header for
 `allow_public_rooms_over_federation` (#13116)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Moritz Stückler <moritz.stueckler@gmail.com>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13116.doc                            | 1 +
 docs/usage/configuration/config_documentation.md | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13116.doc

diff --git a/changelog.d/13116.doc b/changelog.d/13116.doc
new file mode 100644
index 0000000000..f99be50f44
--- /dev/null
+++ b/changelog.d/13116.doc
@@ -0,0 +1 @@
+Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 58a74ace48..19eb504496 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -317,7 +317,7 @@ Example configuration:
 allow_public_rooms_without_auth: true
 ```
 ---
-### `allow_public_rooms_without_auth`
+### `allow_public_rooms_over_federation`
 
 If set to true, allows any other homeserver to fetch the server's public
 rooms directory via federation. Defaults to false.

From 13e359aec8ae8be8dc56a036ae6d9f2bc1d07385 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0imon=20Brandner?= <simon.bra.ag@gmail.com>
Date: Wed, 29 Jun 2022 19:12:45 +0200
Subject: [PATCH 041/178] Implement MSC3827: Filtering of `/publicRooms` by
 room type (#13031)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Šimon Brandner <simon.bra.ag@gmail.com>
---
 changelog.d/13031.feature                     |   1 +
 synapse/api/constants.py                      |  10 ++
 synapse/config/experimental.py                |   3 +
 synapse/handlers/room_list.py                 |  23 +++-
 synapse/handlers/stats.py                     |   3 +
 synapse/rest/client/versions.py               |   2 +
 synapse/storage/databases/main/room.py        | 126 +++++++++++++++++-
 synapse/storage/databases/main/stats.py       |  10 +-
 .../72/01add_room_type_to_state_stats.sql     |  19 +++
 tests/rest/client/test_rooms.py               |  92 ++++++++++++-
 tests/storage/databases/main/test_room.py     |  69 ++++++++++
 11 files changed, 345 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/13031.feature
 create mode 100644 synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql

diff --git a/changelog.d/13031.feature b/changelog.d/13031.feature
new file mode 100644
index 0000000000..fee8e9d1ff
--- /dev/null
+++ b/changelog.d/13031.feature
@@ -0,0 +1 @@
+Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type.
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index e1d31cabed..2653764119 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -259,3 +259,13 @@ class ReceiptTypes:
     READ: Final = "m.read"
     READ_PRIVATE: Final = "org.matrix.msc2285.read.private"
     FULLY_READ: Final = "m.fully_read"
+
+
+class PublicRoomsFilterFields:
+    """Fields in the search filter for `/publicRooms` that we understand.
+
+    As defined in https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3publicrooms
+    """
+
+    GENERIC_SEARCH_TERM: Final = "generic_search_term"
+    ROOM_TYPES: Final = "org.matrix.msc3827.room_types"
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 0a285dba31..ee443cea00 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -87,3 +87,6 @@ class ExperimentalConfig(Config):
 
         # MSC3715: dir param on /relations.
         self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False)
+
+        # MSC3827: Filtering of /publicRooms by room type
+        self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False)
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 183d4ae3c4..29868eb743 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -25,6 +25,7 @@ from synapse.api.constants import (
     GuestAccess,
     HistoryVisibility,
     JoinRules,
+    PublicRoomsFilterFields,
 )
 from synapse.api.errors import (
     Codes,
@@ -181,6 +182,7 @@ class RoomListHandler:
                 == HistoryVisibility.WORLD_READABLE,
                 "guest_can_join": room["guest_access"] == "can_join",
                 "join_rule": room["join_rules"],
+                "org.matrix.msc3827.room_type": room["room_type"],
             }
 
             # Filter out Nones – rather omit the field altogether
@@ -239,7 +241,9 @@ class RoomListHandler:
         response["chunk"] = results
 
         response["total_room_count_estimate"] = await self.store.count_public_rooms(
-            network_tuple, ignore_non_federatable=from_federation
+            network_tuple,
+            ignore_non_federatable=from_federation,
+            search_filter=search_filter,
         )
 
         return response
@@ -508,8 +512,21 @@ class RoomListNextBatch:
 
 
 def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool:
-    if search_filter and search_filter.get("generic_search_term", None):
-        generic_search_term = search_filter["generic_search_term"].upper()
+    """Determines whether the given search filter matches a room entry returned over
+    federation.
+
+    Only used if the remote server does not support MSC2197 remote-filtered search, and
+    hence does not support MSC3827 filtering of `/publicRooms` by room type either.
+
+    In this case, we cannot apply the `room_type` filter since no `room_type` field is
+    returned.
+    """
+    if search_filter and search_filter.get(
+        PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None
+    ):
+        generic_search_term = search_filter[
+            PublicRoomsFilterFields.GENERIC_SEARCH_TERM
+        ].upper()
         if generic_search_term in room_entry.get("name", "").upper():
             return True
         elif generic_search_term in room_entry.get("topic", "").upper():
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index f45e06eb0e..5c01482acf 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -271,6 +271,9 @@ class StatsHandler:
                 room_state["is_federatable"] = (
                     event_content.get(EventContentFields.FEDERATE, True) is True
                 )
+                room_type = event_content.get(EventContentFields.ROOM_TYPE)
+                if isinstance(room_type, str):
+                    room_state["room_type"] = room_type
             elif typ == EventTypes.JoinRules:
                 room_state["join_rules"] = event_content.get("join_rule")
             elif typ == EventTypes.RoomHistoryVisibility:
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index c1bd775fec..f4f06563dd 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -95,6 +95,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled,
                     # Supports receiving private read receipts as per MSC2285
                     "org.matrix.msc2285": self.config.experimental.msc2285_enabled,
+                    # Supports filtering of /publicRooms by room type MSC3827
+                    "org.matrix.msc3827": self.config.experimental.msc3827_enabled,
                     # Adds support for importing historical messages as per MSC2716
                     "org.matrix.msc2716": self.config.experimental.msc2716_enabled,
                     # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 5760d3428e..d8026e3fac 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -32,12 +32,17 @@ from typing import (
 
 import attr
 
-from synapse.api.constants import EventContentFields, EventTypes, JoinRules
+from synapse.api.constants import (
+    EventContentFields,
+    EventTypes,
+    JoinRules,
+    PublicRoomsFilterFields,
+)
 from synapse.api.errors import StoreError
 from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.config.homeserver import HomeServerConfig
 from synapse.events import EventBase
-from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -199,10 +204,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             desc="get_public_room_ids",
         )
 
+    def _construct_room_type_where_clause(
+        self, room_types: Union[List[Union[str, None]], None]
+    ) -> Tuple[Union[str, None], List[str]]:
+        if not room_types or not self.config.experimental.msc3827_enabled:
+            return None, []
+        else:
+            # We use None when we want get rooms without a type
+            is_null_clause = ""
+            if None in room_types:
+                is_null_clause = "OR room_type IS NULL"
+                room_types = [value for value in room_types if value is not None]
+
+            list_clause, args = make_in_list_sql_clause(
+                self.database_engine, "room_type", room_types
+            )
+
+            return f"({list_clause} {is_null_clause})", args
+
     async def count_public_rooms(
         self,
         network_tuple: Optional[ThirdPartyInstanceID],
         ignore_non_federatable: bool,
+        search_filter: Optional[dict],
     ) -> int:
         """Counts the number of public rooms as tracked in the room_stats_current
         and room_stats_state table.
@@ -210,11 +234,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         Args:
             network_tuple
             ignore_non_federatable: If true filters out non-federatable rooms
+            search_filter
         """
 
         def _count_public_rooms_txn(txn: LoggingTransaction) -> int:
             query_args = []
 
+            room_type_clause, args = self._construct_room_type_where_clause(
+                search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None)
+                if search_filter
+                else None
+            )
+            room_type_clause = f" AND {room_type_clause}" if room_type_clause else ""
+            query_args += args
+
             if network_tuple:
                 if network_tuple.appservice_id:
                     published_sql = """
@@ -249,6 +282,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                         OR join_rules = '{JoinRules.KNOCK_RESTRICTED}'
                         OR history_visibility = 'world_readable'
                     )
+                    {room_type_clause}
                     AND joined_members > 0
             """
 
@@ -347,8 +381,12 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         if ignore_non_federatable:
             where_clauses.append("is_federatable")
 
-        if search_filter and search_filter.get("generic_search_term", None):
-            search_term = "%" + search_filter["generic_search_term"] + "%"
+        if search_filter and search_filter.get(
+            PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None
+        ):
+            search_term = (
+                "%" + search_filter[PublicRoomsFilterFields.GENERIC_SEARCH_TERM] + "%"
+            )
 
             where_clauses.append(
                 """
@@ -365,6 +403,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                 search_term.lower(),
             ]
 
+        room_type_clause, args = self._construct_room_type_where_clause(
+            search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None)
+            if search_filter
+            else None
+        )
+        if room_type_clause:
+            where_clauses.append(room_type_clause)
+        query_args += args
+
         where_clause = ""
         if where_clauses:
             where_clause = " AND " + " AND ".join(where_clauses)
@@ -373,7 +420,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         sql = f"""
             SELECT
                 room_id, name, topic, canonical_alias, joined_members,
-                avatar, history_visibility, guest_access, join_rules
+                avatar, history_visibility, guest_access, join_rules, room_type
             FROM (
                 {published_sql}
             ) published
@@ -1166,6 +1213,7 @@ class _BackgroundUpdates:
     POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2"
     REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth"
     POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column"
+    ADD_ROOM_TYPE_COLUMN = "add_room_type_column"
 
 
 _REPLACE_ROOM_DEPTH_SQL_COMMANDS = (
@@ -1200,6 +1248,11 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
             self._background_add_rooms_room_version_column,
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN,
+            self._background_add_room_type_column,
+        )
+
         # BG updates to change the type of room_depth.min_depth
         self.db_pool.updates.register_background_update_handler(
             _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2,
@@ -1569,6 +1622,69 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
 
         return batch_size
 
+    async def _background_add_room_type_column(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Background update to go and add room_type information to `room_stats_state`
+        table from `event_json` table.
+        """
+
+        last_room_id = progress.get("room_id", "")
+
+        def _background_add_room_type_column_txn(
+            txn: LoggingTransaction,
+        ) -> bool:
+            sql = """
+                SELECT state.room_id, json FROM event_json
+                INNER JOIN current_state_events AS state USING (event_id)
+                WHERE state.room_id > ? AND type = 'm.room.create'
+                ORDER BY state.room_id
+                LIMIT ?
+            """
+
+            txn.execute(sql, (last_room_id, batch_size))
+            room_id_to_create_event_results = txn.fetchall()
+
+            new_last_room_id = None
+            for room_id, event_json in room_id_to_create_event_results:
+                event_dict = db_to_json(event_json)
+
+                room_type = event_dict.get("content", {}).get(
+                    EventContentFields.ROOM_TYPE, None
+                )
+                if isinstance(room_type, str):
+                    self.db_pool.simple_update_txn(
+                        txn,
+                        table="room_stats_state",
+                        keyvalues={"room_id": room_id},
+                        updatevalues={"room_type": room_type},
+                    )
+
+                new_last_room_id = room_id
+
+            if new_last_room_id is None:
+                return True
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN,
+                {"room_id": new_last_room_id},
+            )
+
+            return False
+
+        end = await self.db_pool.runInteraction(
+            "_background_add_room_type_column",
+            _background_add_room_type_column_txn,
+        )
+
+        if end:
+            await self.db_pool.updates._end_background_update(
+                _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN
+            )
+
+        return batch_size
+
 
 class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
     def __init__(
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 82851ffa95..b4c652acf3 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -16,7 +16,7 @@
 import logging
 from enum import Enum
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 
 from typing_extensions import Counter
 
@@ -238,6 +238,7 @@ class StatsStore(StateDeltasStore):
         * avatar
         * canonical_alias
         * guest_access
+        * room_type
 
         A is_federatable key can also be included with a boolean value.
 
@@ -263,6 +264,7 @@ class StatsStore(StateDeltasStore):
             "avatar",
             "canonical_alias",
             "guest_access",
+            "room_type",
         ):
             field = fields.get(col, sentinel)
             if field is not sentinel and (not isinstance(field, str) or "\0" in field):
@@ -572,7 +574,7 @@ class StatsStore(StateDeltasStore):
 
         state_event_map = await self.get_events(event_ids, get_prev_content=False)  # type: ignore[attr-defined]
 
-        room_state = {
+        room_state: Dict[str, Union[None, bool, str]] = {
             "join_rules": None,
             "history_visibility": None,
             "encryption": None,
@@ -581,6 +583,7 @@ class StatsStore(StateDeltasStore):
             "avatar": None,
             "canonical_alias": None,
             "is_federatable": True,
+            "room_type": None,
         }
 
         for event in state_event_map.values():
@@ -604,6 +607,9 @@ class StatsStore(StateDeltasStore):
                 room_state["is_federatable"] = (
                     event.content.get(EventContentFields.FEDERATE, True) is True
                 )
+                room_type = event.content.get(EventContentFields.ROOM_TYPE)
+                if isinstance(room_type, str):
+                    room_state["room_type"] = room_type
 
         await self.update_room_state(room_id, room_state)
 
diff --git a/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql
new file mode 100644
index 0000000000..d5e0765471
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql
@@ -0,0 +1,19 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE room_stats_state ADD room_type TEXT;
+
+INSERT INTO background_updates (update_name, progress_json)
+    VALUES ('add_room_type_column', '{}');
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 35c59ee9e0..1ccd96a207 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -18,7 +18,7 @@
 """Tests REST events for /rooms paths."""
 
 import json
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 from unittest.mock import Mock, call
 from urllib import parse as urlparse
 
@@ -33,7 +33,9 @@ from synapse.api.constants import (
     EventContentFields,
     EventTypes,
     Membership,
+    PublicRoomsFilterFields,
     RelationTypes,
+    RoomTypes,
 )
 from synapse.api.errors import Codes, HttpResponseException
 from synapse.handlers.pagination import PurgeStatus
@@ -1858,6 +1860,90 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200, channel.result)
 
 
+class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+
+        config = self.default_config()
+        config["allow_public_rooms_without_auth"] = True
+        config["experimental_features"] = {"msc3827_enabled": True}
+        self.hs = self.setup_test_homeserver(config=config)
+        self.url = b"/_matrix/client/r0/publicRooms"
+
+        return self.hs
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        user = self.register_user("alice", "pass")
+        self.token = self.login(user, "pass")
+
+        # Create a room
+        self.helper.create_room_as(
+            user,
+            is_public=True,
+            extra_content={"visibility": "public"},
+            tok=self.token,
+        )
+        # Create a space
+        self.helper.create_room_as(
+            user,
+            is_public=True,
+            extra_content={
+                "visibility": "public",
+                "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE},
+            },
+            tok=self.token,
+        )
+
+    def make_public_rooms_request(
+        self, room_types: Union[List[Union[str, None]], None]
+    ) -> Tuple[List[Dict[str, Any]], int]:
+        channel = self.make_request(
+            "POST",
+            self.url,
+            {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}},
+            self.token,
+        )
+        chunk = channel.json_body["chunk"]
+        count = channel.json_body["total_room_count_estimate"]
+
+        self.assertEqual(len(chunk), count)
+
+        return chunk, count
+
+    def test_returns_both_rooms_and_spaces_if_no_filter(self) -> None:
+        chunk, count = self.make_public_rooms_request(None)
+
+        self.assertEqual(count, 2)
+
+    def test_returns_only_rooms_based_on_filter(self) -> None:
+        chunk, count = self.make_public_rooms_request([None])
+
+        self.assertEqual(count, 1)
+        self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), None)
+
+    def test_returns_only_space_based_on_filter(self) -> None:
+        chunk, count = self.make_public_rooms_request(["m.space"])
+
+        self.assertEqual(count, 1)
+        self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), "m.space")
+
+    def test_returns_both_rooms_and_space_based_on_filter(self) -> None:
+        chunk, count = self.make_public_rooms_request(["m.space", None])
+
+        self.assertEqual(count, 2)
+
+    def test_returns_both_rooms_and_spaces_if_array_is_empty(self) -> None:
+        chunk, count = self.make_public_rooms_request([])
+
+        self.assertEqual(count, 2)
+
+
 class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
     """Test that we correctly fallback to local filtering if a remote server
     doesn't support search.
@@ -1882,7 +1968,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
         "Simple test for searching rooms over federation"
         self.federation_client.get_public_rooms.return_value = make_awaitable({})  # type: ignore[attr-defined]
 
-        search_filter = {"generic_search_term": "foobar"}
+        search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"}
 
         channel = self.make_request(
             "POST",
@@ -1911,7 +1997,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
             make_awaitable({}),
         )
 
-        search_filter = {"generic_search_term": "foobar"}
+        search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"}
 
         channel = self.make_request(
             "POST",
diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py
index 9abd0cb446..1edb619630 100644
--- a/tests/storage/databases/main/test_room.py
+++ b/tests/storage/databases/main/test_room.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
+from synapse.api.constants import RoomTypes
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.storage.databases.main.room import _BackgroundUpdates
@@ -91,3 +94,69 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
             )
         )
         self.assertEqual(room_creator_after, self.user_id)
+
+    def test_background_add_room_type_column(self):
+        """Test that the background update to populate the `room_type` column in
+        `room_stats_state` works properly.
+        """
+
+        # Create a room without a type
+        room_id = self._generate_room()
+
+        # Get event_id of the m.room.create event
+        event_id = self.get_success(
+            self.store.db_pool.simple_select_one_onecol(
+                table="current_state_events",
+                keyvalues={
+                    "room_id": room_id,
+                    "type": "m.room.create",
+                },
+                retcol="event_id",
+            )
+        )
+
+        # Fake a room creation event with a room type
+        event = {
+            "content": {
+                "creator": "@user:server.org",
+                "room_version": "9",
+                "type": RoomTypes.SPACE,
+            },
+            "type": "m.room.create",
+        }
+        self.get_success(
+            self.store.db_pool.simple_update(
+                table="event_json",
+                keyvalues={"event_id": event_id},
+                updatevalues={"json": json.dumps(event)},
+                desc="test",
+            )
+        )
+
+        # Insert and run the background update
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                {
+                    "update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN,
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        # ... and tell the DataStore that it hasn't finished all updates yet
+        self.store.db_pool.updates._all_done = False
+
+        # Now let's actually drive the updates to completion
+        self.wait_for_background_updates()
+
+        # Make sure the background update filled in the room type
+        room_type_after = self.get_success(
+            self.store.db_pool.simple_select_one_onecol(
+                table="room_stats_state",
+                keyvalues={"room_id": room_id},
+                retcol="room_type",
+                allow_none=True,
+            )
+        )
+        self.assertEqual(room_type_after, RoomTypes.SPACE)

From 4d3b8fb23f0c9288b311efd7def83b641bda82b8 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 30 Jun 2022 10:43:24 +0200
Subject: [PATCH 042/178] Don't actually one-line the SQL statements we send to
 the DB (#13129)

---
 changelog.d/13129.misc      | 1 +
 synapse/storage/database.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13129.misc

diff --git a/changelog.d/13129.misc b/changelog.d/13129.misc
new file mode 100644
index 0000000000..4c2dbb7057
--- /dev/null
+++ b/changelog.d/13129.misc
@@ -0,0 +1 @@
+Only one-line SQL statements for logging and tracing.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e8c63cf567..e21ab08515 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -366,10 +366,11 @@ class LoggingTransaction:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> R:
-        sql = self._make_sql_one_line(sql)
+        # Generate a one-line version of the SQL to better log it.
+        one_line_sql = self._make_sql_one_line(sql)
 
         # TODO(paul): Maybe use 'info' and 'debug' for values?
-        sql_logger.debug("[SQL] {%s} %s", self.name, sql)
+        sql_logger.debug("[SQL] {%s} %s", self.name, one_line_sql)
 
         sql = self.database_engine.convert_param_style(sql)
         if args:
@@ -389,7 +390,7 @@ class LoggingTransaction:
                 "db.query",
                 tags={
                     opentracing.tags.DATABASE_TYPE: "sql",
-                    opentracing.tags.DATABASE_STATEMENT: sql,
+                    opentracing.tags.DATABASE_STATEMENT: one_line_sql,
                 },
             ):
                 return func(sql, *args, **kwargs)

From 80c7a06777507beb5401718dd07fbcb1cd377de1 Mon Sep 17 00:00:00 2001
From: David Teller <D.O.Teller@gmail.com>
Date: Thu, 30 Jun 2022 11:44:47 +0200
Subject: [PATCH 043/178] Rate limiting invites per issuer (#13125)

Co-authored-by: reivilibre <oliverw@matrix.org>
---
 changelog.d/13125.feature       |  1 +
 synapse/config/ratelimiting.py  |  5 +++++
 synapse/handlers/room_member.py | 20 ++++++++++++++++++--
 3 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13125.feature

diff --git a/changelog.d/13125.feature b/changelog.d/13125.feature
new file mode 100644
index 0000000000..9b0f609541
--- /dev/null
+++ b/changelog.d/13125.feature
@@ -0,0 +1 @@
+Add a rate limit for local users sending invites.
\ No newline at end of file
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index d4090a1f9a..4fc1784efe 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -136,6 +136,11 @@ class RatelimitConfig(Config):
             defaults={"per_second": 0.003, "burst_count": 5},
         )
 
+        self.rc_invites_per_issuer = RateLimitConfig(
+            config.get("rc_invites", {}).get("per_issuer", {}),
+            defaults={"per_second": 0.3, "burst_count": 10},
+        )
+
         self.rc_third_party_invite = RateLimitConfig(
             config.get("rc_third_party_invite", {}),
             defaults={
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index bf6bae1232..5648ab4bf4 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -101,19 +101,33 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count,
         )
 
+        # Ratelimiter for invites, keyed by room (across all issuers, all
+        # recipients).
         self._invites_per_room_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
             rate_hz=hs.config.ratelimiting.rc_invites_per_room.per_second,
             burst_count=hs.config.ratelimiting.rc_invites_per_room.burst_count,
         )
-        self._invites_per_user_limiter = Ratelimiter(
+
+        # Ratelimiter for invites, keyed by recipient (across all rooms, all
+        # issuers).
+        self._invites_per_recipient_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
             rate_hz=hs.config.ratelimiting.rc_invites_per_user.per_second,
             burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count,
         )
 
+        # Ratelimiter for invites, keyed by issuer (across all rooms, all
+        # recipients).
+        self._invites_per_issuer_limiter = Ratelimiter(
+            store=self.store,
+            clock=self.clock,
+            rate_hz=hs.config.ratelimiting.rc_invites_per_issuer.per_second,
+            burst_count=hs.config.ratelimiting.rc_invites_per_issuer.burst_count,
+        )
+
         self._third_party_invite_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
@@ -258,7 +272,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         if room_id:
             await self._invites_per_room_limiter.ratelimit(requester, room_id)
 
-        await self._invites_per_user_limiter.ratelimit(requester, invitee_user_id)
+        await self._invites_per_recipient_limiter.ratelimit(requester, invitee_user_id)
+        if requester is not None:
+            await self._invites_per_issuer_limiter.ratelimit(requester)
 
     async def _local_membership_update(
         self,

From 09f6e430254889a8633787a09f154075a17aff23 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 30 Jun 2022 11:45:47 +0200
Subject: [PATCH 044/178] Actually typecheck `tests.test_server` (#13135)

---
 changelog.d/13135.misc | 1 +
 mypy.ini               | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 changelog.d/13135.misc

diff --git a/changelog.d/13135.misc b/changelog.d/13135.misc
new file mode 100644
index 0000000000..f096dd8749
--- /dev/null
+++ b/changelog.d/13135.misc
@@ -0,0 +1 @@
+Enforce type annotations for `tests.test_server`.
diff --git a/mypy.ini b/mypy.ini
index 4b08f45c6d..e062cf43a2 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -56,7 +56,6 @@ exclude = (?x)
    |tests/server.py
    |tests/server_notices/test_resource_limits_server_notices.py
    |tests/test_metrics.py
-   |tests/test_server.py
    |tests/test_state.py
    |tests/test_terms_auth.py
    |tests/util/caches/test_cached_call.py

From 9667bad55d8b50fe08990a8cfd2ac82c8540bcc1 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 30 Jun 2022 12:58:12 +0100
Subject: [PATCH 045/178] Improve startup times in Complement test runs against
 workers, particularly in CPU-constrained environments. (#13127)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 .github/workflows/tests.yml                   |  49 +----
 changelog.d/13127.misc                        |   1 +
 .../complement/conf/start_for_complement.sh   |   3 +
 .../conf-workers/synapse.supervisord.conf.j2  |  26 ++-
 docker/conf/log.config                        |   4 +
 docker/configure_workers_and_start.py         |   7 +
 docker/start.py                               |   6 +-
 synapse/app/_base.py                          |   8 +-
 synapse/app/complement_fork_starter.py        | 190 ++++++++++++++++++
 9 files changed, 243 insertions(+), 51 deletions(-)
 create mode 100644 changelog.d/13127.misc
 create mode 100644 synapse/app/complement_fork_starter.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2e4ee723d3..a775f70c4e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -328,51 +328,8 @@ jobs:
           - arrangement: monolith
             database: Postgres
 
-    steps:
-      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
-      # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
-      - name: "Set Go Version"
-        run: |
-          # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
-          echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
-          # Add the Go path to the PATH: We need this so we can call gotestfmt
-          echo "~/go/bin" >> $GITHUB_PATH
-
-      - name: "Install Complement Dependencies"
-        run: |
-          sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev
-          go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
-
-      - name: Run actions/checkout@v2 for synapse
-        uses: actions/checkout@v2
-        with:
-          path: synapse
-
-      - name: "Install custom gotestfmt template"
-        run: |
-          mkdir .gotestfmt/github -p
-          cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
-
-      # Attempt to check out the same branch of Complement as the PR. If it
-      # doesn't exist, fallback to HEAD.
-      - name: Checkout complement
-        run: synapse/.ci/scripts/checkout_complement.sh
-
-      - run: |
-          set -o pipefail
-          POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
-        shell: bash
-        name: Run Complement Tests
-
-  # We only run the workers tests on `develop` for now, because they're too slow to wait for on PRs.
-  # Sadly, you can't have an `if` condition on the value of a matrix, so this is a temporary, separate job for now.
-  # GitHub Actions doesn't support YAML anchors, so it's full-on duplication for now.
-  complement-developonly:
-    if: "${{ !failure() && !cancelled() && (github.ref == 'refs/heads/develop') }}"
-    needs: linting-done
-    runs-on: ubuntu-latest
-    
-    name: "Complement Workers (develop only)"
+          - arrangement: workers
+            database: Postgres
 
     steps:
       # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
@@ -406,7 +363,7 @@ jobs:
 
       - run: |
           set -o pipefail
-          WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
+          POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
         shell: bash
         name: Run Complement Tests
 
diff --git a/changelog.d/13127.misc b/changelog.d/13127.misc
new file mode 100644
index 0000000000..1414811e0a
--- /dev/null
+++ b/changelog.d/13127.misc
@@ -0,0 +1 @@
+Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments.
\ No newline at end of file
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index 773c7db22f..cc6482f763 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -59,6 +59,9 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
       synchrotron, \
       appservice, \
       pusher"
+
+  # Improve startup times by using a launcher based on fork()
+  export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1
 else
   # Empty string here means 'main process only'
   export SYNAPSE_WORKER_TYPES=""
diff --git a/docker/conf-workers/synapse.supervisord.conf.j2 b/docker/conf-workers/synapse.supervisord.conf.j2
index 6443450491..481eb4fc92 100644
--- a/docker/conf-workers/synapse.supervisord.conf.j2
+++ b/docker/conf-workers/synapse.supervisord.conf.j2
@@ -1,3 +1,24 @@
+{% if use_forking_launcher %}
+[program:synapse_fork]
+command=/usr/local/bin/python -m synapse.app.complement_fork_starter
+  {{ main_config_path }}
+  synapse.app.homeserver
+  --config-path="{{ main_config_path }}"
+  --config-path=/conf/workers/shared.yaml
+  {%- for worker in workers %}
+    -- {{ worker.app }}
+    --config-path="{{ main_config_path }}"
+    --config-path=/conf/workers/shared.yaml
+    --config-path=/conf/workers/{{ worker.name }}.yaml
+  {%- endfor %}
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+autorestart=unexpected
+exitcodes=0
+
+{% else %}
 [program:synapse_main]
 command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver
   --config-path="{{ main_config_path }}"
@@ -13,7 +34,7 @@ autorestart=unexpected
 exitcodes=0
 
 
-{% for worker in workers %}
+  {% for worker in workers %}
 [program:synapse_{{ worker.name }}]
 command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }}
   --config-path="{{ main_config_path }}"
@@ -27,4 +48,5 @@ stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0
 
-{% endfor %}
+  {% endfor %}
+{% endif %}
diff --git a/docker/conf/log.config b/docker/conf/log.config
index dc8c70befd..d9e85aa533 100644
--- a/docker/conf/log.config
+++ b/docker/conf/log.config
@@ -2,7 +2,11 @@ version: 1
 
 formatters:
   precise:
+    {% if include_worker_name_in_log_line %}
+    format: '{{ worker_name }} | %(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
+    {% else %}
     format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
+    {% endif %}
 
 handlers:
 {% if LOG_FILE_PATH %}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 2134b648d5..4521f99eb4 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -26,6 +26,9 @@
 #   * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format.
 #   * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified,
 #         Nginx will be configured to serve TLS on port 8448.
+#   * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher,
+#         only intended for usage in Complement at the moment.
+#         No stability guarantees are provided.
 #
 # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined
 # in the project's README), this script may be run multiple times, and functionality should
@@ -525,6 +528,7 @@ def generate_worker_files(
         "/etc/supervisor/conf.d/synapse.conf",
         workers=worker_descriptors,
         main_config_path=config_path,
+        use_forking_launcher=environ.get("SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"),
     )
 
     # healthcheck config
@@ -560,6 +564,9 @@ def generate_worker_log_config(
         log_config_filepath,
         worker_name=worker_name,
         **extra_log_template_args,
+        include_worker_name_in_log_line=environ.get(
+            "SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"
+        ),
     )
     return log_config_filepath
 
diff --git a/docker/start.py b/docker/start.py
index 4ac8f03477..5a98dce551 100755
--- a/docker/start.py
+++ b/docker/start.py
@@ -110,7 +110,11 @@ def generate_config_from_template(
 
     log_config_file = environ["SYNAPSE_LOG_CONFIG"]
     log("Generating log config file " + log_config_file)
-    convert("/conf/log.config", log_config_file, environ)
+    convert(
+        "/conf/log.config",
+        log_config_file,
+        {**environ, "include_worker_name_in_log_line": False},
+    )
 
     # Hopefully we already have a signing key, but generate one if not.
     args = [
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 363ac98ea9..923891ae0d 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -106,7 +106,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs)
 def start_worker_reactor(
     appname: str,
     config: HomeServerConfig,
-    run_command: Callable[[], None] = reactor.run,
+    # Use a lambda to avoid binding to a given reactor at import time.
+    # (needed when synapse.app.complement_fork_starter is being used)
+    run_command: Callable[[], None] = lambda: reactor.run(),
 ) -> None:
     """Run the reactor in the main process
 
@@ -141,7 +143,9 @@ def start_reactor(
     daemonize: bool,
     print_pidfile: bool,
     logger: logging.Logger,
-    run_command: Callable[[], None] = reactor.run,
+    # Use a lambda to avoid binding to a given reactor at import time.
+    # (needed when synapse.app.complement_fork_starter is being used)
+    run_command: Callable[[], None] = lambda: reactor.run(),
 ) -> None:
     """Run the reactor in the main process
 
diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py
new file mode 100644
index 0000000000..89eb07df27
--- /dev/null
+++ b/synapse/app/complement_fork_starter.py
@@ -0,0 +1,190 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ## What this script does
+#
+# This script spawns multiple workers, whilst only going through the code loading
+# process once. The net effect is that start-up time for a swarm of workers is
+# reduced, particularly in CPU-constrained environments.
+#
+# Before the workers are spawned, the database is prepared in order to avoid the
+# workers racing.
+#
+# ## Stability
+#
+# This script is only intended for use within the Synapse images for the
+# Complement test suite.
+# There are currently no stability guarantees whatsoever; especially not about:
+# - whether it will continue to exist in future versions;
+# - the format of its command-line arguments; or
+# - any details about its behaviour or principles of operation.
+#
+# ## Usage
+#
+# The first argument should be the path to the database configuration, used to
+# set up the database. The rest of the arguments are used as follows:
+# Each worker is specified as an argument group (each argument group is
+# separated by '--').
+# The first argument in each argument group is the Python module name of the application
+# to start. Further arguments are then passed to that module as-is.
+#
+# ## Example
+#
+#   python -m synapse.app.complement_fork_starter path_to_db_config.yaml \
+#     synapse.app.homeserver [args..] -- \
+#     synapse.app.generic_worker [args..] -- \
+#   ...
+#     synapse.app.generic_worker [args..]
+#
+import argparse
+import importlib
+import itertools
+import multiprocessing
+import sys
+from typing import Any, Callable, List
+
+from twisted.internet.main import installReactor
+
+
+class ProxiedReactor:
+    """
+    Twisted tracks the 'installed' reactor as a global variable.
+    (Actually, it does some module trickery, but the effect is similar.)
+
+    The default EpollReactor is buggy if it's created before a process is
+    forked, then used in the child.
+    See https://twistedmatrix.com/trac/ticket/4759#comment:17.
+
+    However, importing certain Twisted modules will automatically create and
+    install a reactor if one hasn't already been installed.
+    It's not normally possible to re-install a reactor.
+
+    Given the goal of launching workers with fork() to only import the code once,
+    this presents a conflict.
+    Our work around is to 'install' this ProxiedReactor which prevents Twisted
+    from creating and installing one, but which lets us replace the actual reactor
+    in use later on.
+    """
+
+    def __init__(self) -> None:
+        self.___reactor_target: Any = None
+
+    def _install_real_reactor(self, new_reactor: Any) -> None:
+        """
+        Install a real reactor for this ProxiedReactor to forward lookups onto.
+
+        This method is specific to our ProxiedReactor and should not clash with
+        any names used on an actual Twisted reactor.
+        """
+        self.___reactor_target = new_reactor
+
+    def __getattr__(self, attr_name: str) -> Any:
+        return getattr(self.___reactor_target, attr_name)
+
+
+def _worker_entrypoint(
+    func: Callable[[], None], proxy_reactor: ProxiedReactor, args: List[str]
+) -> None:
+    """
+    Entrypoint for a forked worker process.
+
+    We just need to set up the command-line arguments, create our real reactor
+    and then kick off the worker's main() function.
+    """
+
+    sys.argv = args
+
+    from twisted.internet.epollreactor import EPollReactor
+
+    proxy_reactor._install_real_reactor(EPollReactor())
+    func()
+
+
+def main() -> None:
+    """
+    Entrypoint for the forking launcher.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("db_config", help="Path to database config file")
+    parser.add_argument(
+        "args",
+        nargs="...",
+        help="Argument groups separated by `--`. "
+        "The first argument of each group is a Synapse app name. "
+        "Subsequent arguments are passed through.",
+    )
+    ns = parser.parse_args()
+
+    # Split up the subsequent arguments into each workers' arguments;
+    # `--` is our delimiter of choice.
+    args_by_worker: List[List[str]] = [
+        list(args)
+        for cond, args in itertools.groupby(ns.args, lambda ele: ele != "--")
+        if cond and args
+    ]
+
+    # Prevent Twisted from installing a shared reactor that all the workers will
+    # inherit when we fork(), by installing our own beforehand.
+    proxy_reactor = ProxiedReactor()
+    installReactor(proxy_reactor)
+
+    # Import the entrypoints for all the workers.
+    worker_functions = []
+    for worker_args in args_by_worker:
+        worker_module = importlib.import_module(worker_args[0])
+        worker_functions.append(worker_module.main)
+
+    # We need to prepare the database first as otherwise all the workers will
+    # try to create a schema version table and some will crash out.
+    from synapse._scripts import update_synapse_database
+
+    update_proc = multiprocessing.Process(
+        target=_worker_entrypoint,
+        args=(
+            update_synapse_database.main,
+            proxy_reactor,
+            [
+                "update_synapse_database",
+                "--database-config",
+                ns.db_config,
+                "--run-background-updates",
+            ],
+        ),
+    )
+    print("===== PREPARING DATABASE =====", file=sys.stderr)
+    update_proc.start()
+    update_proc.join()
+    print("===== PREPARED DATABASE =====", file=sys.stderr)
+
+    # At this point, we've imported all the main entrypoints for all the workers.
+    # Now we basically just fork() out to create the workers we need.
+    # Because we're using fork(), all the workers get a clone of this launcher's
+    # memory space and don't need to repeat the work of loading the code!
+    # Instead of using fork() directly, we use the multiprocessing library,
+    # which uses fork() on Unix platforms.
+    processes = []
+    for (func, worker_args) in zip(worker_functions, args_by_worker):
+        process = multiprocessing.Process(
+            target=_worker_entrypoint, args=(func, proxy_reactor, worker_args)
+        )
+        process.start()
+        processes.append(process)
+
+    # Be a good parent and wait for our children to die before exiting.
+    for process in processes:
+        process.join()
+
+
+if __name__ == "__main__":
+    main()

From 6ad012ef89c966cbb3616c1be63d964db48d49ca Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 30 Jun 2022 09:05:06 -0400
Subject: [PATCH 046/178] More type hints for `synapse.logging` (#13103)

Completes type hints for synapse.logging.scopecontextmanager and (partially)
for synapse.logging.opentracing.
---
 changelog.d/13103.misc                 |  1 +
 mypy.ini                               |  3 --
 synapse/logging/opentracing.py         | 61 +++++++++++++++-----------
 synapse/logging/scopecontextmanager.py | 35 ++++++++-------
 tests/logging/test_opentracing.py      |  2 +-
 5 files changed, 56 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/13103.misc

diff --git a/changelog.d/13103.misc b/changelog.d/13103.misc
new file mode 100644
index 0000000000..4de5f9e905
--- /dev/null
+++ b/changelog.d/13103.misc
@@ -0,0 +1 @@
+Add missing type hints to `synapse.logging`.
diff --git a/mypy.ini b/mypy.ini
index e062cf43a2..b9b16860db 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -88,9 +88,6 @@ disallow_untyped_defs = False
 [mypy-synapse.logging.opentracing]
 disallow_untyped_defs = False
 
-[mypy-synapse.logging.scopecontextmanager]
-disallow_untyped_defs = False
-
 [mypy-synapse.metrics._reactor_metrics]
 disallow_untyped_defs = False
 # This module imports select.epoll. That exists on Linux, but doesn't on macOS.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 903ec40c86..50c57940f9 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -164,6 +164,7 @@ Gotchas
   with an active span?
 """
 import contextlib
+import enum
 import inspect
 import logging
 import re
@@ -268,7 +269,7 @@ try:
 
         _reporter: Reporter = attr.Factory(Reporter)
 
-        def set_process(self, *args, **kwargs):
+        def set_process(self, *args: Any, **kwargs: Any) -> None:
             return self._reporter.set_process(*args, **kwargs)
 
         def report_span(self, span: "opentracing.Span") -> None:
@@ -319,7 +320,11 @@ _homeserver_whitelist: Optional[Pattern[str]] = None
 
 # Util methods
 
-Sentinel = object()
+
+class _Sentinel(enum.Enum):
+    # defining a sentinel in this way allows mypy to correctly handle the
+    # type of a dictionary lookup.
+    sentinel = object()
 
 
 P = ParamSpec("P")
@@ -339,12 +344,12 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]:
     return _only_if_tracing_inner
 
 
-def ensure_active_span(message, ret=None):
+def ensure_active_span(message: str, ret=None):
     """Executes the operation only if opentracing is enabled and there is an active span.
     If there is no active span it logs message at the error level.
 
     Args:
-        message (str): Message which fills in "There was no active span when trying to %s"
+        message: Message which fills in "There was no active span when trying to %s"
             in the error log if there is no active span and opentracing is enabled.
         ret (object): return value if opentracing is None or there is no active span.
 
@@ -402,7 +407,7 @@ def init_tracer(hs: "HomeServer") -> None:
     config = JaegerConfig(
         config=hs.config.tracing.jaeger_config,
         service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}",
-        scope_manager=LogContextScopeManager(hs.config),
+        scope_manager=LogContextScopeManager(),
         metrics_factory=PrometheusMetricsFactory(),
     )
 
@@ -451,15 +456,15 @@ def whitelisted_homeserver(destination: str) -> bool:
 
 # Could use kwargs but I want these to be explicit
 def start_active_span(
-    operation_name,
-    child_of=None,
-    references=None,
-    tags=None,
-    start_time=None,
-    ignore_active_span=False,
-    finish_on_close=True,
+    operation_name: str,
+    child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None,
+    references: Optional[List["opentracing.Reference"]] = None,
+    tags: Optional[Dict[str, str]] = None,
+    start_time: Optional[float] = None,
+    ignore_active_span: bool = False,
+    finish_on_close: bool = True,
     *,
-    tracer=None,
+    tracer: Optional["opentracing.Tracer"] = None,
 ):
     """Starts an active opentracing span.
 
@@ -493,11 +498,11 @@ def start_active_span(
 def start_active_span_follows_from(
     operation_name: str,
     contexts: Collection,
-    child_of=None,
+    child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None,
     start_time: Optional[float] = None,
     *,
-    inherit_force_tracing=False,
-    tracer=None,
+    inherit_force_tracing: bool = False,
+    tracer: Optional["opentracing.Tracer"] = None,
 ):
     """Starts an active opentracing span, with additional references to previous spans
 
@@ -540,7 +545,7 @@ def start_active_span_from_edu(
     edu_content: Dict[str, Any],
     operation_name: str,
     references: Optional[List["opentracing.Reference"]] = None,
-    tags: Optional[Dict] = None,
+    tags: Optional[Dict[str, str]] = None,
     start_time: Optional[float] = None,
     ignore_active_span: bool = False,
     finish_on_close: bool = True,
@@ -617,23 +622,27 @@ def set_operation_name(operation_name: str) -> None:
 
 
 @only_if_tracing
-def force_tracing(span=Sentinel) -> None:
+def force_tracing(
+    span: Union["opentracing.Span", _Sentinel] = _Sentinel.sentinel
+) -> None:
     """Force sampling for the active/given span and its children.
 
     Args:
         span: span to force tracing for. By default, the active span.
     """
-    if span is Sentinel:
-        span = opentracing.tracer.active_span
-    if span is None:
+    if isinstance(span, _Sentinel):
+        span_to_trace = opentracing.tracer.active_span
+    else:
+        span_to_trace = span
+    if span_to_trace is None:
         logger.error("No active span in force_tracing")
         return
 
-    span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
+    span_to_trace.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
 
     # also set a bit of baggage, so that we have a way of figuring out if
     # it is enabled later
-    span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1")
+    span_to_trace.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1")
 
 
 def is_context_forced_tracing(
@@ -789,7 +798,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte
 # Tracing decorators
 
 
-def trace(func=None, opname=None):
+def trace(func=None, opname: Optional[str] = None):
     """
     Decorator to trace a function.
     Sets the operation name to that of the function's or that given
@@ -822,11 +831,11 @@ def trace(func=None, opname=None):
                     result = func(*args, **kwargs)
                     if isinstance(result, defer.Deferred):
 
-                        def call_back(result):
+                        def call_back(result: R) -> R:
                             scope.__exit__(None, None, None)
                             return result
 
-                        def err_back(result):
+                        def err_back(result: R) -> R:
                             scope.__exit__(None, None, None)
                             return result
 
diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py
index a26a1a58e7..10877bdfc5 100644
--- a/synapse/logging/scopecontextmanager.py
+++ b/synapse/logging/scopecontextmanager.py
@@ -16,11 +16,15 @@ import logging
 from types import TracebackType
 from typing import Optional, Type
 
-from opentracing import Scope, ScopeManager
+from opentracing import Scope, ScopeManager, Span
 
 import twisted
 
-from synapse.logging.context import current_context, nested_logging_context
+from synapse.logging.context import (
+    LoggingContext,
+    current_context,
+    nested_logging_context,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -35,11 +39,11 @@ class LogContextScopeManager(ScopeManager):
     but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301.
     """
 
-    def __init__(self, config):
+    def __init__(self) -> None:
         pass
 
     @property
-    def active(self):
+    def active(self) -> Optional[Scope]:
         """
         Returns the currently active Scope which can be used to access the
         currently active Scope.span.
@@ -48,19 +52,18 @@ class LogContextScopeManager(ScopeManager):
         Tracer.start_active_span() time.
 
         Return:
-            (Scope) : the Scope that is active, or None if not
-            available.
+            The Scope that is active, or None if not available.
         """
         ctx = current_context()
         return ctx.scope
 
-    def activate(self, span, finish_on_close):
+    def activate(self, span: Span, finish_on_close: bool) -> Scope:
         """
         Makes a Span active.
         Args
-            span (Span): the span that should become active.
-            finish_on_close (Boolean): whether Span should be automatically
-                finished when Scope.close() is called.
+            span: the span that should become active.
+            finish_on_close: whether Span should be automatically finished when
+                Scope.close() is called.
 
         Returns:
             Scope to control the end of the active period for
@@ -112,8 +115,8 @@ class _LogContextScope(Scope):
     def __init__(
         self,
         manager: LogContextScopeManager,
-        span,
-        logcontext,
+        span: Span,
+        logcontext: LoggingContext,
         enter_logcontext: bool,
         finish_on_close: bool,
     ):
@@ -121,13 +124,13 @@ class _LogContextScope(Scope):
         Args:
             manager:
                 the manager that is responsible for this scope.
-            span (Span):
+            span:
                 the opentracing span which this scope represents the local
                 lifetime for.
-            logcontext (LogContext):
-                the logcontext to which this scope is attached.
+            logcontext:
+                the log context to which this scope is attached.
             enter_logcontext:
-                if True the logcontext will be exited when the scope is finished
+                if True the log context will be exited when the scope is finished
             finish_on_close:
                 if True finish the span when the scope is closed
         """
diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py
index e430941d27..40148d503c 100644
--- a/tests/logging/test_opentracing.py
+++ b/tests/logging/test_opentracing.py
@@ -50,7 +50,7 @@ class LogContextScopeManagerTestCase(TestCase):
         # global variables that power opentracing. We create our own tracer instance
         # and test with it.
 
-        scope_manager = LogContextScopeManager({})
+        scope_manager = LogContextScopeManager()
         config = jaeger_client.config.Config(
             config={}, service_name="test", scope_manager=scope_manager
         )

From a3a05c812dbffd143f9a58bdf050bfac9062ba02 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Jun 2022 15:05:49 +0100
Subject: [PATCH 047/178] Add index to help delete old push actions (#13141)

---
 changelog.d/13141.bugfix                      |  1 +
 synapse/_scripts/synapse_port_db.py           |  6 ++----
 .../databases/main/event_push_actions.py      | 10 ++++++++++
 .../delta/72/02event_push_actions_index.sql   | 19 +++++++++++++++++++
 4 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13141.bugfix
 create mode 100644 synapse/storage/schema/main/delta/72/02event_push_actions_index.sql

diff --git a/changelog.d/13141.bugfix b/changelog.d/13141.bugfix
new file mode 100644
index 0000000000..930e870865
--- /dev/null
+++ b/changelog.d/13141.bugfix
@@ -0,0 +1 @@
+Fix DB performance when deleting old push notifications. Introduced in v1.62.0rc1.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 9c06c837dc..7b96f61d7b 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -58,9 +58,7 @@ from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateSt
 from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
 from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
 from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackgroundStore
-from synapse.storage.databases.main.event_push_actions import (
-    EventPushActionsWorkerStore,
-)
+from synapse.storage.databases.main.event_push_actions import EventPushActionsStore
 from synapse.storage.databases.main.events_bg_updates import (
     EventsBackgroundUpdatesStore,
 )
@@ -202,7 +200,7 @@ R = TypeVar("R")
 
 
 class Store(
-    EventPushActionsWorkerStore,
+    EventPushActionsStore,
     ClientIpBackgroundUpdateStore,
     DeviceInboxBackgroundUpdateStore,
     DeviceBackgroundUpdateStore,
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 80ca2fd0b6..3fadd0930b 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1189,6 +1189,16 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
             where_clause="highlight=1",
         )
 
+        # Add index to make deleting old push actions faster.
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_stream_highlight_index",
+            index_name="event_push_actions_stream_highlight_index",
+            table="event_push_actions",
+            columns=["highlight", "stream_ordering"],
+            where_clause="highlight=0",
+            psql_only=True,
+        )
+
     async def get_push_actions_for_user(
         self,
         user_id: str,
diff --git a/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql b/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql
new file mode 100644
index 0000000000..cd0c11d951
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/02event_push_actions_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add an index to `event_push_actions` to make deleting old non-highlight push
+-- actions faster.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7202, 'event_push_actions_stream_highlight_index', '{}');

From dbce28b2f14e5a4d0db9f090caf390ea998d1e59 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Jun 2022 15:08:40 +0100
Subject: [PATCH 048/178] Fix unread counts on large servers (#13140)

---
 changelog.d/13140.bugfix                      |  1 +
 .../databases/main/event_push_actions.py      | 48 ++++++++++---------
 tests/storage/test_event_push_actions.py      | 12 ++---
 3 files changed, 32 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/13140.bugfix

diff --git a/changelog.d/13140.bugfix b/changelog.d/13140.bugfix
new file mode 100644
index 0000000000..cb0586e39e
--- /dev/null
+++ b/changelog.d/13140.bugfix
@@ -0,0 +1 @@
+Fix unread counts for users on large servers. Introduced in v1.62.0rc1.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3fadd0930b..7d4754b3d3 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -854,18 +854,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
 
         limit = 100
 
-        min_stream_id = self.db_pool.simple_select_one_onecol_txn(
+        min_receipts_stream_id = self.db_pool.simple_select_one_onecol_txn(
             txn,
             table="event_push_summary_last_receipt_stream_id",
             keyvalues={},
             retcol="stream_id",
         )
 
+        max_receipts_stream_id = self._receipts_id_gen.get_current_token()
+
         sql = """
             SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
             FROM receipts_linearized AS r
             INNER JOIN events AS e USING (event_id)
-            WHERE r.stream_id > ? AND user_id LIKE ?
+            WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
             ORDER BY r.stream_id ASC
             LIMIT ?
         """
@@ -877,13 +879,21 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         txn.execute(
             sql,
             (
-                min_stream_id,
+                min_receipts_stream_id,
+                max_receipts_stream_id,
                 user_filter,
                 limit,
             ),
         )
         rows = txn.fetchall()
 
+        old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="event_push_summary_stream_ordering",
+            keyvalues={},
+            retcol="stream_ordering",
+        )
+
         # For each new read receipt we delete push actions from before it and
         # recalculate the summary.
         for _, room_id, user_id, stream_ordering in rows:
@@ -902,13 +912,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering),
             )
 
-            old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
-                txn,
-                table="event_push_summary_stream_ordering",
-                keyvalues={},
-                retcol="stream_ordering",
-            )
-
             notif_count, unread_count = self._get_notif_unread_count_for_user_room(
                 txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
             )
@@ -927,18 +930,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
 
         # We always update `event_push_summary_last_receipt_stream_id` to
         # ensure that we don't rescan the same receipts for remote users.
-        #
-        # This requires repeatable read to be safe, as we need the
-        # `MAX(stream_id)` to not include any new rows that have been committed
-        # since the start of the transaction (since those rows won't have been
-        # returned by the query above). Alternatively we could query the max
-        # stream ID at the start of the transaction and bound everything by
-        # that.
-        txn.execute(
-            """
-            UPDATE event_push_summary_last_receipt_stream_id
-            SET stream_id = (SELECT COALESCE(MAX(stream_id), 0) FROM receipts_linearized)
-            """
+
+        upper_limit = max_receipts_stream_id
+        if len(rows) >= limit:
+            # If we pulled out a limited number of rows we only update the
+            # position to the last receipt we processed, so we continue
+            # processing the rest next iteration.
+            upper_limit = rows[-1][0]
+
+        self.db_pool.simple_update_txn(
+            txn,
+            table="event_push_summary_last_receipt_stream_id",
+            keyvalues={},
+            updatevalues={"stream_id": upper_limit},
         )
 
         return len(rows) < limit
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index ef069a8110..684485ae06 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -134,15 +134,12 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             last_read_stream_ordering[0] = stream
 
             self.get_success(
-                self.store.db_pool.runInteraction(
-                    "",
-                    self.store._insert_linearized_receipt_txn,
+                self.store.insert_receipt(
                     room_id,
                     "m.read",
-                    user_id,
-                    f"$test{stream}:example.com",
-                    {},
-                    stream,
+                    user_id=user_id,
+                    event_ids=[f"$test{stream}:example.com"],
+                    data={},
                 )
             )
 
@@ -166,6 +163,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
         _inject_actions(6, PlAIN_NOTIF)
         _rotate(7)
+        _assert_counts(1, 0)
 
         self.get_success(
             self.store.db_pool.simple_delete(

From 0ceb3af10b88f9f195fd42db12d33dafda8e6261 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 30 Jun 2022 15:59:11 +0100
Subject: [PATCH 049/178] Add a link to the configuration manual from the
 homeserver sample config documentation page (#13139)

---
 changelog.d/13139.doc                                | 1 +
 docs/usage/configuration/homeserver_sample_config.md | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/13139.doc

diff --git a/changelog.d/13139.doc b/changelog.d/13139.doc
new file mode 100644
index 0000000000..f5d99d461a
--- /dev/null
+++ b/changelog.d/13139.doc
@@ -0,0 +1 @@
+Add a link to the configuration manual from the homeserver sample config documentation.
diff --git a/docs/usage/configuration/homeserver_sample_config.md b/docs/usage/configuration/homeserver_sample_config.md
index 11e806998d..2dbfb35baa 100644
--- a/docs/usage/configuration/homeserver_sample_config.md
+++ b/docs/usage/configuration/homeserver_sample_config.md
@@ -9,6 +9,9 @@ a real homeserver.yaml. Instead, if you are starting from scratch, please genera
 a fresh config using Synapse by following the instructions in
 [Installation](../../setup/installation.md).
 
+Documentation for all configuration options can be found in the
+[Configuration Manual](./config_documentation.md).
+
 ```yaml
 {{#include ../../sample_config.yaml}}
 ```

From 8330fc9953032f21eb4c7d5f0627c1e6aba2459c Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 30 Jun 2022 09:21:39 -0700
Subject: [PATCH 050/178] Cleanup references to sample config in the docs and
 redirect users to configuration manual (#13077)

---
 changelog.d/13077.doc                         |  3 +
 docs/admin_api/user_admin_api.md              |  5 +-
 docs/code_style.md                            | 93 +++++++------------
 docs/jwt.md                                   |  5 +-
 docs/manhole.md                               |  6 +-
 docs/message_retention_policies.md            | 18 ++--
 docs/openid.md                                |  4 +-
 docs/setup/forward_proxy.md                   |  4 +-
 docs/setup/installation.md                    | 14 +--
 .../configuration/config_documentation.md     |  2 +-
 .../user_authentication/single_sign_on/cas.md |  4 +-
 synapse/config/emailconfig.py                 |  2 +-
 12 files changed, 72 insertions(+), 88 deletions(-)
 create mode 100644 changelog.d/13077.doc

diff --git a/changelog.d/13077.doc b/changelog.d/13077.doc
new file mode 100644
index 0000000000..502f2d059e
--- /dev/null
+++ b/changelog.d/13077.doc
@@ -0,0 +1,3 @@
+Clean up references to sample configuration and redirect users to the configuration manual instead.
+
+
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 62f89e8cba..1235f1cb95 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -124,9 +124,8 @@ Body parameters:
   - `address` - string. Value of third-party ID.
   belonging to a user.
 - `external_ids` - array, optional. Allow setting the identifier of the external identity
-  provider for SSO (Single sign-on). Details in
-  [Sample Configuration File](../usage/configuration/homeserver_sample_config.html)
-  section `sso` and `oidc_providers`.
+  provider for SSO (Single sign-on). Details in the configuration manual under the
+  sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers).
   - `auth_provider` - string. ID of the external identity provider. Value of `idp_id`
     in the homeserver configuration. Note that no error is raised if the provided
     value is not in the homeserver configuration.
diff --git a/docs/code_style.md b/docs/code_style.md
index db7edcd76b..d65fda62d1 100644
--- a/docs/code_style.md
+++ b/docs/code_style.md
@@ -70,82 +70,61 @@ on save as they take a while and can be very resource intensive.
     -   Avoid wildcard imports (`from synapse.types import *`) and
         relative imports (`from .types import UserID`).
 
-## Configuration file format
+## Configuration code and documentation format
 
-The [sample configuration file](./sample_config.yaml) acts as a
+When adding a configuration option to the code, if several settings are grouped into a single dict, ensure that your code
+correctly handles the top-level option being set to `None` (as it will be if no sub-options are enabled).
+
+The [configuration manual](usage/configuration/config_documentation.md) acts as a
 reference to Synapse's configuration options for server administrators.
 Remember that many readers will be unfamiliar with YAML and server
-administration in general, so that it is important that the file be as
-easy to understand as possible, which includes following a consistent
-format.
+administration in general, so it is important that when you add
+a configuration option the documentation be as easy to understand as possible, which 
+includes following a consistent format.
 
 Some guidelines follow:
 
--   Sections should be separated with a heading consisting of a single
-    line prefixed and suffixed with `##`. There should be **two** blank
-    lines before the section header, and **one** after.
--   Each option should be listed in the file with the following format:
-    -   A comment describing the setting. Each line of this comment
-        should be prefixed with a hash (`#`) and a space.
+- Each option should be listed in the config manual with the following format:
+      
+    - The name of the option, prefixed by `###`. 
 
-        The comment should describe the default behaviour (ie, what
+    - A comment which describes the default behaviour (i.e. what
         happens if the setting is omitted), as well as what the effect
         will be if the setting is changed.
-
-        Often, the comment end with something like "uncomment the
-        following to <do action>".
-
-    -   A line consisting of only `#`.
-    -   A commented-out example setting, prefixed with only `#`.
+    - An example setting, using backticks to define the code block
 
         For boolean (on/off) options, convention is that this example
-        should be the *opposite* to the default (so the comment will end
-        with "Uncomment the following to enable [or disable]
-        <feature>." For other options, the example should give some
-        non-default value which is likely to be useful to the reader.
+        should be the *opposite* to the default. For other options, the example should give
+        some non-default value which is likely to be useful to the reader.
 
--   There should be a blank line between each option.
--   Where several settings are grouped into a single dict, *avoid* the
-    convention where the whole block is commented out, resulting in
-    comment lines starting `# #`, as this is hard to read and confusing
-    to edit. Instead, leave the top-level config option uncommented, and
-    follow the conventions above for sub-options. Ensure that your code
-    correctly handles the top-level option being set to `None` (as it
-    will be if no sub-options are enabled).
--   Lines should be wrapped at 80 characters.
--   Use two-space indents.
--   `true` and `false` are spelt thus (as opposed to `True`, etc.)
--   Use single quotes (`'`) rather than double-quotes (`"`) or backticks
-    (`` ` ``) to refer to configuration options.
+- There should be a horizontal rule between each option, which can be achieved by adding `---` before and
+  after the option.
+- `true` and `false` are spelt thus (as opposed to `True`, etc.)
 
 Example:
 
+---
+### `modules`
+
+Use the `module` sub-option to add a module under `modules` to extend functionality. 
+The `module` setting then has a sub-option, `config`, which can be used to define some configuration
+for the `module`.
+
+Defaults to none.
+
+Example configuration:
 ```yaml
-## Frobnication ##
-
-# The frobnicator will ensure that all requests are fully frobnicated.
-# To enable it, uncomment the following.
-#
-#frobnicator_enabled: true
-
-# By default, the frobnicator will frobnicate with the default frobber.
-# The following will make it use an alternative frobber.
-#
-#frobincator_frobber: special_frobber
-
-# Settings for the frobber
-#
-frobber:
-  # frobbing speed. Defaults to 1.
-  #
-  #speed: 10
-
-  # frobbing distance. Defaults to 1000.
-  #
-  #distance: 100
+modules:
+  - module: my_super_module.MySuperClass
+    config:
+      do_thing: true
+  - module: my_other_super_module.SomeClass
+    config: {}
 ```
+---
 
 Note that the sample configuration is generated from the synapse code
 and is maintained by a script, `scripts-dev/generate_sample_config.sh`.
 Making sure that the output from this script matches the desired format
 is left as an exercise for the reader!
+
diff --git a/docs/jwt.md b/docs/jwt.md
index 8f859d59a6..2e262583a7 100644
--- a/docs/jwt.md
+++ b/docs/jwt.md
@@ -49,9 +49,8 @@ as follows:
 * For other installation mechanisms, see the documentation provided by the
   maintainer.
 
-To enable the JSON web token integration, you should then add a `jwt_config` section
-to your configuration file (or uncomment the `enabled: true` line in the
-existing section). See [sample_config.yaml](./sample_config.yaml) for some
+To enable the JSON web token integration, you should then add a `jwt_config` option
+to your configuration file. See the [configuration manual](usage/configuration/config_documentation.md#jwt_config) for some
 sample settings.
 
 ## How to test JWT as a developer
diff --git a/docs/manhole.md b/docs/manhole.md
index a82fad0f0f..4e5bf833ce 100644
--- a/docs/manhole.md
+++ b/docs/manhole.md
@@ -13,8 +13,10 @@ environments where untrusted users have shell access.
 
 ## Configuring the manhole
 
-To enable it, first uncomment the `manhole` listener configuration in
-`homeserver.yaml`. The configuration is slightly different if you're using docker.
+To enable it, first add the `manhole` listener configuration in your
+`homeserver.yaml`. You can find information on how to do that 
+in the [configuration manual](usage/configuration/config_documentation.md#manhole_settings).
+The configuration is slightly different if you're using docker.
 
 #### Docker config
 
diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md
index b52c4aaa24..8c88f93935 100644
--- a/docs/message_retention_policies.md
+++ b/docs/message_retention_policies.md
@@ -49,9 +49,9 @@ clients.
 
 ## Server configuration
 
-Support for this feature can be enabled and configured in the
-`retention` section of the Synapse configuration file (see the
-[sample file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518)).
+Support for this feature can be enabled and configured by adding a the
+`retention` in the Synapse configuration file (see
+[configuration manual](usage/configuration/config_documentation.md#retention)).
 
 To enable support for message retention policies, set the setting
 `enabled` in this section to `true`.
@@ -65,8 +65,8 @@ message retention policy configured in its state. This allows server
 admins to ensure that messages are never kept indefinitely in a server's
 database. 
 
-A default policy can be defined as such, in the `retention` section of
-the configuration file:
+A default policy can be defined as such, by adding the `retention` option in
+the configuration file and adding these sub-options:
 
 ```yaml
 default_policy:
@@ -86,8 +86,8 @@ Purge jobs are the jobs that Synapse runs in the background to purge
 expired events from the database. They are only run if support for
 message retention policies is enabled in the server's configuration. If
 no configuration for purge jobs is configured by the server admin,
-Synapse will use a default configuration, which is described in the
-[sample configuration file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518).
+Synapse will use a default configuration, which is described here in the
+[configuration manual](usage/configuration/config_documentation.md#retention).
 
 Some server admins might want a finer control on when events are removed
 depending on an event's room's policy. This can be done by setting the
@@ -137,8 +137,8 @@ the server's database.
 ### Lifetime limits
 
 Server admins can set limits on the values of `max_lifetime` to use when
-purging old events in a room. These limits can be defined as such in the
-`retention` section of the configuration file:
+purging old events in a room. These limits can be defined under the
+`retention` option in the configuration file:
 
 ```yaml
 allowed_lifetime_min: 1d
diff --git a/docs/openid.md b/docs/openid.md
index 9d615a5737..d0ccf36f71 100644
--- a/docs/openid.md
+++ b/docs/openid.md
@@ -45,8 +45,8 @@ as follows:
    maintainer.
 
 To enable the OpenID integration, you should then add a section to the `oidc_providers`
-setting in your configuration file (or uncomment one of the existing examples).
-See [sample_config.yaml](./sample_config.yaml) for some sample settings, as well as
+setting in your configuration file.
+See the [configuration manual](usage/configuration/config_documentation.md#oidc_providers) for some sample settings, as well as
 the text below for example configurations for specific providers.
 
 ## Sample configs
diff --git a/docs/setup/forward_proxy.md b/docs/setup/forward_proxy.md
index 494c14893b..3482691f83 100644
--- a/docs/setup/forward_proxy.md
+++ b/docs/setup/forward_proxy.md
@@ -66,8 +66,8 @@ in Synapse can be deactivated.
 
 **NOTE**: This has an impact on security and is for testing purposes only!
 
-To deactivate the certificate validation, the following setting must be made in
-[homserver.yaml](../usage/configuration/homeserver_sample_config.md).
+To deactivate the certificate validation, the following setting must be added to
+your [homserver.yaml](../usage/configuration/homeserver_sample_config.md).
 
 ```yaml
 use_insecure_ssl_client_just_for_testing_do_not_use: true
diff --git a/docs/setup/installation.md b/docs/setup/installation.md
index 1580529fd1..260e50577b 100644
--- a/docs/setup/installation.md
+++ b/docs/setup/installation.md
@@ -407,11 +407,11 @@ The recommended way to do so is to set up a reverse proxy on port
 Alternatively, you can configure Synapse to expose an HTTPS port. To do
 so, you will need to edit `homeserver.yaml`, as follows:
 
-- First, under the `listeners` section, uncomment the configuration for the
-  TLS-enabled listener. (Remove the hash sign (`#`) at the start of
-  each line). The relevant lines are like this:
+- First, under the `listeners` option, add the configuration for the
+  TLS-enabled listener like so:
 
 ```yaml
+listeners:
   - port: 8448
     type: http
     tls: true
@@ -419,9 +419,11 @@ so, you will need to edit `homeserver.yaml`, as follows:
       - names: [client, federation]
   ```
 
-- You will also need to uncomment the `tls_certificate_path` and
-  `tls_private_key_path` lines under the `TLS` section. You will need to manage
-  provisioning of these certificates yourself.
+- You will also need to add the options `tls_certificate_path` and
+  `tls_private_key_path`. to your configuration file. You will need to manage provisioning of 
+   these certificates yourself.
+- You can find more information about these options as well as how to configure synapse in the 
+  [configuration manual](../usage/configuration/config_documentation.md).
 
   If you are using your own certificate, be sure to use a `.pem` file that
   includes the full certificate chain including any intermediate certificates
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 19eb504496..82edd53e36 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2999,7 +2999,7 @@ This setting has the following sub-options:
 * `localdb_enabled`: Set to false to disable authentication against the local password
    database. This is ignored if `enabled` is false, and is only useful
    if you have other `password_providers`. Defaults to true. 
-* `pepper`: Set the value here to a secret random string for extra security. # Uncomment and change to a secret random string for extra security.
+* `pepper`: Set the value here to a secret random string for extra security.
    DO NOT CHANGE THIS AFTER INITIAL SETUP!
 * `policy`: Define and enforce a password policy, such as minimum lengths for passwords, etc. 
    Each parameter is optional. This is an implementation of MSC2000. Parameters are as follows:
diff --git a/docs/usage/configuration/user_authentication/single_sign_on/cas.md b/docs/usage/configuration/user_authentication/single_sign_on/cas.md
index 3bac1b29f0..899face876 100644
--- a/docs/usage/configuration/user_authentication/single_sign_on/cas.md
+++ b/docs/usage/configuration/user_authentication/single_sign_on/cas.md
@@ -4,5 +4,5 @@ Synapse supports authenticating users via the [Central Authentication
 Service protocol](https://en.wikipedia.org/wiki/Central_Authentication_Service)
 (CAS) natively.
 
-Please see the `cas_config` and `sso` sections of the [Synapse configuration
-file](../../../configuration/homeserver_sample_config.md) for more details.
\ No newline at end of file
+Please see the [cas_config](../../../configuration/config_documentation.md#cas_config) and [sso](../../../configuration/config_documentation.md#sso)
+sections of the configuration manual for more details.
\ No newline at end of file
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index c82f3ee7a3..6e11fbdb9a 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -145,7 +145,7 @@ class EmailConfig(Config):
             raise ConfigError(
                 'The config option "trust_identity_server_for_password_resets" '
                 'has been replaced by "account_threepid_delegate". '
-                "Please consult the sample config at docs/sample_config.yaml for "
+                "Please consult the configuration manual at docs/usage/configuration/config_documentation.md for "
                 "details and update your config file."
             )
 

From 046a6513bcad2f7111e12e3b750eb798466731da Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 30 Jun 2022 09:22:40 -0700
Subject: [PATCH 051/178] Don't process /send requests for users who have hit
 their ratelimit (#13134)

---
 changelog.d/13134.misc      | 1 +
 synapse/handlers/message.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/13134.misc

diff --git a/changelog.d/13134.misc b/changelog.d/13134.misc
new file mode 100644
index 0000000000..e3e16056d1
--- /dev/null
+++ b/changelog.d/13134.misc
@@ -0,0 +1 @@
+Apply ratelimiting earlier in processing of /send request.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 189f52fe5a..c6b40a5b7a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -903,6 +903,9 @@ class EventCreationHandler:
             await self.clock.sleep(random.randint(1, 10))
             raise ShadowBanError()
 
+        if ratelimit:
+            await self.request_ratelimiter.ratelimit(requester, update=False)
+
         # We limit the number of concurrent event sends in a room so that we
         # don't fork the DAG too much. If we don't limit then we can end up in
         # a situation where event persistence can't keep up, causing

From 50f0e4028b334566a067b671d15246a9b05e8498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jacek=20Ku=C5=9Bnierz?= <jacek.kusnierz@tum.de>
Date: Thu, 30 Jun 2022 19:48:04 +0200
Subject: [PATCH 052/178] Allow dependency errors to pass through (#13113)

Signed-off-by: Jacek Kusnierz <jacek.kusnierz@tum.de>
Co-authored-by: Brendan Abolivier <babolivier@matrix.org>
---
 changelog.d/13113.misc       |  1 +
 synapse/config/cache.py      |  9 ++-------
 synapse/config/jwt.py        | 17 +++--------------
 synapse/config/metrics.py    |  9 ++-------
 synapse/config/oidc.py       | 10 ++--------
 synapse/config/repository.py | 10 ++--------
 synapse/config/saml2.py      |  9 ++-------
 synapse/config/tracer.py     |  9 ++-------
 8 files changed, 16 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/13113.misc

diff --git a/changelog.d/13113.misc b/changelog.d/13113.misc
new file mode 100644
index 0000000000..7b1a50eec0
--- /dev/null
+++ b/changelog.d/13113.misc
@@ -0,0 +1 @@
+Raise a `DependencyError` on missing dependencies instead of a `ConfigError`.
\ No newline at end of file
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index 63310c8d07..2db8cfb005 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -21,7 +21,7 @@ from typing import Any, Callable, Dict, Optional
 import attr
 
 from synapse.types import JsonDict
-from synapse.util.check_dependencies import DependencyException, check_requirements
+from synapse.util.check_dependencies import check_requirements
 
 from ._base import Config, ConfigError
 
@@ -159,12 +159,7 @@ class CacheConfig(Config):
 
         self.track_memory_usage = cache_config.get("track_memory_usage", False)
         if self.track_memory_usage:
-            try:
-                check_requirements("cache_memory")
-            except DependencyException as e:
-                raise ConfigError(
-                    e.message  # noqa: B306, DependencyException.message is a property
-                )
+            check_requirements("cache_memory")
 
         expire_caches = cache_config.get("expire_caches", True)
         cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py
index 49aaca7cf6..a973bb5080 100644
--- a/synapse/config/jwt.py
+++ b/synapse/config/jwt.py
@@ -15,14 +15,9 @@
 from typing import Any
 
 from synapse.types import JsonDict
+from synapse.util.check_dependencies import check_requirements
 
-from ._base import Config, ConfigError
-
-MISSING_AUTHLIB = """Missing authlib library. This is required for jwt login.
-
-    Install by running:
-        pip install synapse[jwt]
-    """
+from ._base import Config
 
 
 class JWTConfig(Config):
@@ -41,13 +36,7 @@ class JWTConfig(Config):
             # that the claims exist on the JWT.
             self.jwt_issuer = jwt_config.get("issuer")
             self.jwt_audiences = jwt_config.get("audiences")
-
-            try:
-                from authlib.jose import JsonWebToken
-
-                JsonWebToken  # To stop unused lint.
-            except ImportError:
-                raise ConfigError(MISSING_AUTHLIB)
+            check_requirements("jwt")
         else:
             self.jwt_enabled = False
             self.jwt_secret = None
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index d636507886..3b42be5b5b 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -18,7 +18,7 @@ from typing import Any, Optional
 import attr
 
 from synapse.types import JsonDict
-from synapse.util.check_dependencies import DependencyException, check_requirements
+from synapse.util.check_dependencies import check_requirements
 
 from ._base import Config, ConfigError
 
@@ -57,12 +57,7 @@ class MetricsConfig(Config):
 
         self.sentry_enabled = "sentry" in config
         if self.sentry_enabled:
-            try:
-                check_requirements("sentry")
-            except DependencyException as e:
-                raise ConfigError(
-                    e.message  # noqa: B306, DependencyException.message is a property
-                )
+            check_requirements("sentry")
 
             self.sentry_dsn = config["sentry"].get("dsn")
             if not self.sentry_dsn:
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index 98e8cd8b5a..5418a332da 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -24,7 +24,7 @@ from synapse.types import JsonDict
 from synapse.util.module_loader import load_module
 from synapse.util.stringutils import parse_and_validate_mxc_uri
 
-from ..util.check_dependencies import DependencyException, check_requirements
+from ..util.check_dependencies import check_requirements
 from ._base import Config, ConfigError, read_file
 
 DEFAULT_USER_MAPPING_PROVIDER = "synapse.handlers.oidc.JinjaOidcMappingProvider"
@@ -41,12 +41,7 @@ class OIDCConfig(Config):
         if not self.oidc_providers:
             return
 
-        try:
-            check_requirements("oidc")
-        except DependencyException as e:
-            raise ConfigError(
-                e.message  # noqa: B306, DependencyException.message is a property
-            ) from e
+        check_requirements("oidc")
 
         # check we don't have any duplicate idp_ids now. (The SSO handler will also
         # check for duplicates when the REST listeners get registered, but that happens
@@ -146,7 +141,6 @@ OIDC_PROVIDER_CONFIG_WITH_ID_SCHEMA = {
     "allOf": [OIDC_PROVIDER_CONFIG_SCHEMA, {"required": ["idp_id", "idp_name"]}]
 }
 
-
 # the `oidc_providers` list can either be None (as it is in the default config), or
 # a list of provider configs, each of which requires an explicit ID and name.
 OIDC_PROVIDER_LIST_SCHEMA = {
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index aadec1e54e..3c69dd325f 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -21,7 +21,7 @@ import attr
 
 from synapse.config.server import generate_ip_set
 from synapse.types import JsonDict
-from synapse.util.check_dependencies import DependencyException, check_requirements
+from synapse.util.check_dependencies import check_requirements
 from synapse.util.module_loader import load_module
 
 from ._base import Config, ConfigError
@@ -184,13 +184,7 @@ class ContentRepositoryConfig(Config):
         )
         self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
-            try:
-                check_requirements("url_preview")
-
-            except DependencyException as e:
-                raise ConfigError(
-                    e.message  # noqa: B306, DependencyException.message is a property
-                )
+            check_requirements("url_preview")
 
             proxy_env = getproxies_environment()
             if "url_preview_ip_range_blacklist" not in config:
diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py
index bd7c234d31..49ca663dde 100644
--- a/synapse/config/saml2.py
+++ b/synapse/config/saml2.py
@@ -18,7 +18,7 @@ from typing import Any, List, Set
 
 from synapse.config.sso import SsoAttributeRequirement
 from synapse.types import JsonDict
-from synapse.util.check_dependencies import DependencyException, check_requirements
+from synapse.util.check_dependencies import check_requirements
 from synapse.util.module_loader import load_module, load_python_module
 
 from ._base import Config, ConfigError
@@ -76,12 +76,7 @@ class SAML2Config(Config):
         if not saml2_config.get("sp_config") and not saml2_config.get("config_path"):
             return
 
-        try:
-            check_requirements("saml2")
-        except DependencyException as e:
-            raise ConfigError(
-                e.message  # noqa: B306, DependencyException.message is a property
-            )
+        check_requirements("saml2")
 
         self.saml2_enabled = True
 
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index 6fbf927f11..c19270c6c5 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -15,7 +15,7 @@
 from typing import Any, List, Set
 
 from synapse.types import JsonDict
-from synapse.util.check_dependencies import DependencyException, check_requirements
+from synapse.util.check_dependencies import check_requirements
 
 from ._base import Config, ConfigError
 
@@ -40,12 +40,7 @@ class TracerConfig(Config):
         if not self.opentracer_enabled:
             return
 
-        try:
-            check_requirements("opentracing")
-        except DependencyException as e:
-            raise ConfigError(
-                e.message  # noqa: B306, DependencyException.message is a property
-            )
+        check_requirements("opentracing")
 
         # The tracer is enabled so sanitize the config
 

From c0efc689cb925ff42e5617e7cddba11f18ab22de Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 30 Jun 2022 22:12:28 +0100
Subject: [PATCH 053/178] Add documentation for phone home stats (#13086)

---
 changelog.d/13086.doc                         |  1 +
 docs/SUMMARY.md                               |  1 +
 .../reporting_anonymised_statistics.md        | 81 +++++++++++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 changelog.d/13086.doc
 create mode 100644 docs/usage/administration/monitoring/reporting_anonymised_statistics.md

diff --git a/changelog.d/13086.doc b/changelog.d/13086.doc
new file mode 100644
index 0000000000..a3960ca325
--- /dev/null
+++ b/changelog.d/13086.doc
@@ -0,0 +1 @@
+Add documentation for anonymised homeserver statistics collection.
\ No newline at end of file
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index b51c7a3cb4..3978f96fc3 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -69,6 +69,7 @@
       - [Federation](usage/administration/admin_api/federation.md)
     - [Manhole](manhole.md)
     - [Monitoring](metrics-howto.md)
+      - [Reporting Anonymised Statistics](usage/administration/monitoring/reporting_anonymised_statistics.md)
     - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md)
     - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md)
     - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md)
diff --git a/docs/usage/administration/monitoring/reporting_anonymised_statistics.md b/docs/usage/administration/monitoring/reporting_anonymised_statistics.md
new file mode 100644
index 0000000000..4f1e0fecf5
--- /dev/null
+++ b/docs/usage/administration/monitoring/reporting_anonymised_statistics.md
@@ -0,0 +1,81 @@
+# Reporting Anonymised Statistics
+
+When generating your Synapse configuration file, you are asked whether you
+would like to report anonymised statistics to Matrix.org. These statistics
+provide the foundation a glimpse into the number of Synapse homeservers
+participating in the network, as well as statistics such as the number of
+rooms being created and messages being sent. This feature is sometimes
+affectionately called "phone-home" stats. Reporting
+[is optional](../../configuration/config_documentation.md#report_stats)
+and the reporting endpoint
+[can be configured](../../configuration/config_documentation.md#report_stats_endpoint),
+in case you would like to instead report statistics from a set of homeservers
+to your own infrastructure.
+
+This documentation aims to define the statistics available and the
+homeserver configuration options that exist to tweak it.
+
+## Available Statistics
+
+The following statistics are sent to the configured reporting endpoint:
+
+| Statistic Name             | Type   | Description                                                                                                                                                                                                                                                                                     |
+|----------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `memory_rss`               | int    | The memory usage of the process (in kilobytes on Unix-based systems, bytes on MacOS).                                                                                                                                                                                                           |
+| `cpu_average`              | int    | CPU time in % of a single core (not % of all cores).                                                                                                                                                                                                                                            |              
+| `homeserver`               | string | The homeserver's server name.                                                                                                                                                                                                                                                                   |
+| `server_context`           | string | An arbitrary string used to group statistics from a set of homeservers.                                                                                                                                                                                                                         |
+| `timestamp`                | int    | The current time, represented as the number of seconds since the epoch.                                                                                                                                                                                                                         |                 
+| `uptime_seconds`           | int    | The number of seconds since the homeserver was last started.                                                                                                                                                                                                                                    |
+| `python_version`           | string | The Python version number in use (e.g "3.7.1"). Taken from `sys.version_info`.                                                                                                                                                                                                                  |
+| `total_users`              | int    | The number of registered users on the homeserver.                                                                                                                                                                                                                                               |
+| `total_nonbridged_users`   | int    | The number of users, excluding those created by an Application Service.                                                                                                                                                                                                                         |
+| `daily_user_type_native`   | int    | The number of native users created in the last 24 hours.                                                                                                                                                                                                                                        |
+| `daily_user_type_guest`    | int    | The number of guest users created in the last 24 hours.                                                                                                                                                                                                                                         |
+| `daily_user_type_bridged`  | int    | The number of users created by Application Services in the last 24 hours.                                                                                                                                                                                                                       |
+| `total_room_count`         | int    | The total number of rooms present on the homeserver.                                                                                                                                                                                                                                            |
+| `daily_active_users`       | int    | The number of unique users[^1] that have used the homeserver in the last 24 hours.                                                                                                                                                                                                              |
+| `monthly_active_users`     | int    | The number of unique users[^1] that have used the homeserver in the last 30 days.                                                                                                                                                                                                               |
+| `daily_active_rooms`       | int    | The number of rooms that have had a (state) event with the type `m.room.message` sent in them in the last 24 hours.                                                                                                                                                                             |
+| `daily_active_e2ee_rooms`  | int    | The number of rooms that have had a (state) event with the type `m.room.encrypted` sent in them in the last 24 hours.                                                                                                                                                                           |
+| `daily_messages`           | int    | The number of (state) events with the type `m.room.message` seen in the last 24 hours.                                                                                                                                                                                                          |
+| `daily_e2ee_messages`      | int    | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours.                                                                                                                                                                                                        |
+| `daily_sent_messages`      | int    | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours.                                                                                                                                                                                     |
+| `daily_sent_e2ee_messages` | int    | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours.                                                                                                                                                                                   |
+| `r30_users_all`            | int    | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types.             |
+| `r30_users_android`        | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string.                                                                                                                                                                        |
+| `r30_users_ios`            | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string.                                                                                                                                                                            |
+| `r30_users_electron`       | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string.                                                                                                                                                                       |
+| `r30_users_web`            | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string.                                                                                                                                                             |
+| `r30v2_users_all`          | int    | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. |
+| `r30v2_users_android`      | int    | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string.                                                                                                                           |
+| `r30v2_users_ios`          | int    | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string.                                                                                                                               |
+| `r30v2_users_electron`     | int    | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "electron" (case-insensitive) in the user agent string.                                                                                                                          |
+| `r30v2_users_web`          | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "mozilla" or "gecko" (case-insensitive) in the user agent string.                                                                                                                                          |
+| `cache_factor`             | int    | The configured [`global factor`](../../configuration/config_documentation.md#caching) value for caching.                                                                                                                                                                                        |
+| `event_cache_size`         | int    | The configured [`event_cache_size`](../../configuration/config_documentation.md#caching) value for caching.                                                                                                                                                                                     |
+| `database_engine`          | string | The database engine that is in use. Either "psycopg2" meaning PostgreSQL is in use, or "sqlite3" for SQLite3.                                                                                                                                                                                   |
+| `database_server_version` | string | The version of the database server. Examples being "10.10" for PostgreSQL server version 10.0, and "3.38.5" for SQLite 3.38.5 installed on the system.                                                                                                                                          |
+| `log_level` | string | The log level in use. Examples are "INFO", "WARNING", "ERROR", "DEBUG", etc.                                                                                                                                                                                                                    |
+
+
+[^1]: Native matrix users and guests are always counted. If the
+[`track_puppeted_user_ips`](../../configuration/config_documentation.md#track_puppeted_user_ips)
+option is set to `true`, "puppeted" users (users that an Application Service have performed
+[an action on behalf of](https://spec.matrix.org/v1.3/application-service-api/#identity-assertion))
+will also be counted. Note that an Application Service can "puppet" any user in their
+[user namespace](https://spec.matrix.org/v1.3/application-service-api/#registration),
+not only users that the Application Service has created. If this happens, the Application Service
+will additionally be counted as a user (irrespective of `track_puppeted_user_ips`).
+
+## Using a Custom Statistics Collection Server
+
+If statistics reporting is enabled, the endpoint that Synapse sends metrics to is configured by the
+[`report_stats_endpoint`](../../configuration/config_documentation.md#report_stats_endpoint) config
+option. By default, statistics are sent to Matrix.org.
+
+If you would like to set up your own statistics collection server and send metrics there, you may
+consider using one of the following known implementations:
+
+* [Matrix.org's Panopticon](https://github.com/matrix-org/panopticon)
+* [Famedly's Barad-dûr](https://gitlab.com/famedly/company/devops/services/barad-dur)

From 8c2825276fec6e03434f1924482788ea3281a9fc Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 1 Jul 2022 10:19:27 +0100
Subject: [PATCH 054/178] Skip waiting for full state for incoming events
 (#13144)

When we receive an event over federation during a faster join, there is no need
to wait for full state, since we have a whole reconciliation process designed
to take the partial state into account.
---
 changelog.d/13144.misc    |  1 +
 synapse/state/__init__.py | 12 +++++++++---
 tests/test_state.py       |  4 +++-
 3 files changed, 13 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13144.misc

diff --git a/changelog.d/13144.misc b/changelog.d/13144.misc
new file mode 100644
index 0000000000..34762e2fcd
--- /dev/null
+++ b/changelog.d/13144.misc
@@ -0,0 +1 @@
+Faster joins: skip waiting for full state when processing incoming events over federation.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 9d3fe66100..d5cbdb3eef 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -249,8 +249,12 @@ class StateHandler:
                 partial_state = True
 
             logger.debug("calling resolve_state_groups from compute_event_context")
+            # we've already taken into account partial state, so no need to wait for
+            # complete state here.
             entry = await self.resolve_state_groups_for_events(
-                event.room_id, event.prev_event_ids()
+                event.room_id,
+                event.prev_event_ids(),
+                await_full_state=False,
             )
 
             state_ids_before_event = entry.state
@@ -335,7 +339,7 @@ class StateHandler:
 
     @measure_func()
     async def resolve_state_groups_for_events(
-        self, room_id: str, event_ids: Collection[str]
+        self, room_id: str, event_ids: Collection[str], await_full_state: bool = True
     ) -> _StateCacheEntry:
         """Given a list of event_ids this method fetches the state at each
         event, resolves conflicts between them and returns them.
@@ -343,6 +347,8 @@ class StateHandler:
         Args:
             room_id
             event_ids
+            await_full_state: if true, will block if we do not yet have complete
+               state at these events.
 
         Returns:
             The resolved state
@@ -350,7 +356,7 @@ class StateHandler:
         logger.debug("resolve_state_groups event_ids %s", event_ids)
 
         state_groups = await self._state_storage_controller.get_state_group_for_events(
-            event_ids
+            event_ids, await_full_state=await_full_state
         )
 
         state_group_ids = state_groups.values()
diff --git a/tests/test_state.py b/tests/test_state.py
index b005dd8d0f..7b3f52f68e 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -131,7 +131,9 @@ class _DummyStore:
     async def get_room_version_id(self, room_id):
         return RoomVersions.V1.identifier
 
-    async def get_state_group_for_events(self, event_ids):
+    async def get_state_group_for_events(
+        self, event_ids, await_full_state: bool = True
+    ):
         res = {}
         for event in event_ids:
             res[event] = self._event_to_state_group[event]

From 6da861ae6937e85689825c06c9198673f5209a2b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 1 Jul 2022 10:52:10 +0100
Subject: [PATCH 055/178] `_process_received_pdu`: Improve exception handling
 (#13145)

`_check_event_auth` is expected to raise `AuthError`s, so no need to log it
again.
---
 changelog.d/13145.misc               |  1 +
 synapse/handlers/federation_event.py | 13 ++++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13145.misc

diff --git a/changelog.d/13145.misc b/changelog.d/13145.misc
new file mode 100644
index 0000000000..d5e2dba866
--- /dev/null
+++ b/changelog.d/13145.misc
@@ -0,0 +1 @@
+Improve exception handling when processing events received over federation.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index b7c54e642f..479d936dc0 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1092,20 +1092,19 @@ class FederationEventHandler:
         logger.debug("Processing event: %s", event)
         assert not event.internal_metadata.outlier
 
+        context = await self._state_handler.compute_event_context(
+            event,
+            state_ids_before_event=state_ids,
+        )
         try:
-            context = await self._state_handler.compute_event_context(
-                event,
-                state_ids_before_event=state_ids,
-            )
             context = await self._check_event_auth(
                 origin,
                 event,
                 context,
             )
         except AuthError as e:
-            # FIXME richvdh 2021/10/07 I don't think this is reachable. Let's log it
-            #   for now
-            logger.exception("Unexpected AuthError from _check_event_auth")
+            # This happens only if we couldn't find the auth events. We'll already have
+            # logged a warning, so now we just convert to a FederationError.
             raise FederationError("ERROR", e.code, e.msg, affected=event.event_id)
 
         if not backfilled and not context.rejected:

From d70ff5cc3508f4010ca2d19b090f0338e99c1d28 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 1 Jul 2022 12:04:56 +0200
Subject: [PATCH 056/178] Extra validation for rest/client/account_data
 (#13148)

* Extra validation for rest/client/account_data

This is a fairly simple endpoint and we did pretty well here.

* Changelog
---
 changelog.d/13148.feature           |  1 +
 synapse/rest/client/account_data.py | 19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13148.feature

diff --git a/changelog.d/13148.feature b/changelog.d/13148.feature
new file mode 100644
index 0000000000..d1104b04b0
--- /dev/null
+++ b/changelog.d/13148.feature
@@ -0,0 +1 @@
+Improve validation logic in Synapse's REST endpoints.
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index bfe985939b..f13970b898 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -15,11 +15,11 @@
 import logging
 from typing import TYPE_CHECKING, Tuple
 
-from synapse.api.errors import AuthError, NotFoundError, SynapseError
+from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict
+from synapse.types import JsonDict, RoomID
 
 from ._base import client_patterns
 
@@ -104,6 +104,13 @@ class RoomAccountDataServlet(RestServlet):
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot add account data for other users.")
 
+        if not RoomID.is_valid(room_id):
+            raise SynapseError(
+                400,
+                f"{room_id} is not a valid room ID",
+                Codes.INVALID_PARAM,
+            )
+
         body = parse_json_object_from_request(request)
 
         if account_data_type == "m.fully_read":
@@ -111,6 +118,7 @@ class RoomAccountDataServlet(RestServlet):
                 405,
                 "Cannot set m.fully_read through this API."
                 " Use /rooms/!roomId:server.name/read_markers",
+                Codes.BAD_JSON,
             )
 
         await self.handler.add_account_data_to_room(
@@ -130,6 +138,13 @@ class RoomAccountDataServlet(RestServlet):
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot get account data for other users.")
 
+        if not RoomID.is_valid(room_id):
+            raise SynapseError(
+                400,
+                f"{room_id} is not a valid room ID",
+                Codes.INVALID_PARAM,
+            )
+
         event = await self.store.get_account_data_for_room_and_type(
             user_id, room_id, account_data_type
         )

From d40b2708cf9cfa2cf7ea7ac6e0273369430a103b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Fri, 1 Jul 2022 11:42:57 +0100
Subject: [PATCH 057/178] 1.62.0rc2

---
 CHANGES.md               | 10 ++++++++++
 changelog.d/13140.bugfix |  1 -
 changelog.d/13141.bugfix |  1 -
 debian/changelog         |  6 ++++++
 pyproject.toml           |  2 +-
 5 files changed, 17 insertions(+), 3 deletions(-)
 delete mode 100644 changelog.d/13140.bugfix
 delete mode 100644 changelog.d/13141.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 4c1decf8f4..50b4da5f61 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,13 @@
+Synapse 1.62.0rc2 (2022-07-01)
+==============================
+
+Bugfixes
+--------
+
+- Fix unread counts for users on large servers. Introduced in v1.62.0rc1. ([\#13140](https://github.com/matrix-org/synapse/issues/13140))
+- Fix DB performance when deleting old push notifications. Introduced in v1.62.0rc1. ([\#13141](https://github.com/matrix-org/synapse/issues/13141))
+
+
 Synapse 1.62.0rc1 (2022-06-28)
 ==============================
 
diff --git a/changelog.d/13140.bugfix b/changelog.d/13140.bugfix
deleted file mode 100644
index cb0586e39e..0000000000
--- a/changelog.d/13140.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix unread counts for users on large servers. Introduced in v1.62.0rc1.
diff --git a/changelog.d/13141.bugfix b/changelog.d/13141.bugfix
deleted file mode 100644
index 930e870865..0000000000
--- a/changelog.d/13141.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix DB performance when deleting old push notifications. Introduced in v1.62.0rc1.
diff --git a/debian/changelog b/debian/changelog
index 7fbd9baef6..295532196b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.62.0~rc2) stable; urgency=medium
+
+  * New Synapse release 1.62.0rc2.
+
+ -- Synapse Packaging team <packages@matrix.org>  Fri, 01 Jul 2022 11:42:41 +0100
+
 matrix-synapse-py3 (1.62.0~rc1) stable; urgency=medium
 
   * New Synapse release 1.62.0rc1.
diff --git a/pyproject.toml b/pyproject.toml
index 8b66d3a9e4..1abbf0f5e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.62.0rc1"
+version = "1.62.0rc2"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From fe910fb10ef854c8c884c6e9a8e7034da5124464 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 1 Jul 2022 13:33:59 +0100
Subject: [PATCH 058/178] complement.sh: Permit skipping docker build (#13143)

Add a `-f` argument to `complement.sh` to skip the docker build
---
 changelog.d/13143.misc    |  1 +
 scripts-dev/complement.sh | 68 ++++++++++++++++++++++++++++++---------
 2 files changed, 53 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/13143.misc

diff --git a/changelog.d/13143.misc b/changelog.d/13143.misc
new file mode 100644
index 0000000000..1cb77c02d7
--- /dev/null
+++ b/changelog.d/13143.misc
@@ -0,0 +1 @@
+Add support to `complement.sh` for skipping the docker build.
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index f1843717cb..20df5fbc24 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -14,9 +14,12 @@
 # By default Synapse is run in monolith mode. This can be overridden by
 # setting the WORKERS environment variable.
 #
-# A regular expression of test method names can be supplied as the first
-# argument to the script. Complement will then only run those tests. If
-# no regex is supplied, all tests are run. For example;
+# You can optionally give a "-f" argument (for "fast") before any to skip
+# rebuilding the docker images, if you just want to rerun the tests.
+#
+# Remaining commandline arguments are passed through to `go test`. For example,
+# you can supply a regular expression of test method names via the "-run"
+# argument:
 #
 # ./complement.sh -run "TestOutboundFederation(Profile|Send)"
 #
@@ -32,6 +35,37 @@ echo_if_github() {
   fi
 }
 
+# Helper to print out the usage instructions
+usage() {
+    cat >&2 <<EOF
+Usage: $0 [-f] <go test arguments>...
+Run the complement test suite on Synapse.
+
+  -f    Skip rebuilding the docker images, and just use the most recent
+        'complement-synapse:latest' image
+
+For help on arguments to 'go test', run 'go help testflag'.
+EOF
+}
+
+# parse our arguments
+skip_docker_build=""
+while [ $# -ge 1 ]; do
+    arg=$1
+    case "$arg" in
+        "-h")
+            usage
+            exit 1
+            ;;
+        "-f")
+            skip_docker_build=1
+            ;;
+        *)
+            # unknown arg: presumably an argument to gotest. break the loop.
+            break
+    esac
+    shift
+done
 
 # enable buildkit for the docker builds
 export DOCKER_BUILDKIT=1
@@ -49,21 +83,23 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then
   echo "Checkout available at 'complement-${COMPLEMENT_REF}'"
 fi
 
-# Build the base Synapse image from the local checkout
-echo_if_github "::group::Build Docker image: matrixdotorg/synapse"
-docker build -t matrixdotorg/synapse -f "docker/Dockerfile" .
-echo_if_github "::endgroup::"
+if [ -z "$skip_docker_build" ]; then
+    # Build the base Synapse image from the local checkout
+    echo_if_github "::group::Build Docker image: matrixdotorg/synapse"
+    docker build -t matrixdotorg/synapse -f "docker/Dockerfile" .
+    echo_if_github "::endgroup::"
 
-# Build the workers docker image (from the base Synapse image we just built).
-echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers"
-docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" .
-echo_if_github "::endgroup::"
+    # Build the workers docker image (from the base Synapse image we just built).
+    echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers"
+    docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" .
+    echo_if_github "::endgroup::"
 
-# Build the unified Complement image (from the worker Synapse image we just built).
-echo_if_github "::group::Build Docker image: complement/Dockerfile"
-docker build -t complement-synapse \
-  -f "docker/complement/Dockerfile" "docker/complement"
-echo_if_github "::endgroup::"
+    # Build the unified Complement image (from the worker Synapse image we just built).
+    echo_if_github "::group::Build Docker image: complement/Dockerfile"
+    docker build -t complement-synapse \
+           -f "docker/complement/Dockerfile" "docker/complement"
+    echo_if_github "::endgroup::"
+fi
 
 export COMPLEMENT_BASE_IMAGE=complement-synapse
 

From c04e25789ee7fa5bd57864ad7687595f44996798 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 1 Jul 2022 16:42:49 +0100
Subject: [PATCH 059/178] Enable Complement testing in the 'Twisted Trunk' CI
 runs. (#13079)

---
 .github/workflows/twisted_trunk.yml | 67 +++++++++++++++++++++++++++++
 changelog.d/13079.misc              |  1 +
 docker/Dockerfile                   |  9 +++-
 scripts-dev/complement.sh           |  7 ++-
 4 files changed, 82 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13079.misc

diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml
index 5f0671f350..12267405be 100644
--- a/.github/workflows/twisted_trunk.yml
+++ b/.github/workflows/twisted_trunk.yml
@@ -96,6 +96,72 @@ jobs:
             /logs/results.tap
             /logs/**/*.log*
 
+  complement:
+    if: "${{ !failure() && !cancelled() }}"
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - arrangement: monolith
+            database: SQLite
+
+          - arrangement: monolith
+            database: Postgres
+
+          - arrangement: workers
+            database: Postgres
+
+    steps:
+      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
+      # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
+      - name: "Set Go Version"
+        run: |
+          # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
+          echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
+          # Add the Go path to the PATH: We need this so we can call gotestfmt
+          echo "~/go/bin" >> $GITHUB_PATH
+
+      - name: "Install Complement Dependencies"
+        run: |
+          sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev
+          go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
+
+      - name: Run actions/checkout@v2 for synapse
+        uses: actions/checkout@v2
+        with:
+          path: synapse
+
+      # This step is specific to the 'Twisted trunk' test run:
+      - name: Patch dependencies
+        run: |
+          set -x
+          DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx
+          pipx install poetry==1.1.12
+
+          poetry remove -n twisted
+          poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk
+          poetry lock --no-update
+          # NOT IN 1.1.12 poetry lock --check
+        working-directory: synapse
+
+      - name: "Install custom gotestfmt template"
+        run: |
+          mkdir .gotestfmt/github -p
+          cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
+
+      # Attempt to check out the same branch of Complement as the PR. If it
+      # doesn't exist, fallback to HEAD.
+      - name: Checkout complement
+        run: synapse/.ci/scripts/checkout_complement.sh
+
+      - run: |
+          set -o pipefail
+          TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
+        shell: bash
+        name: Run Complement Tests
+
   # open an issue if the build fails, so we know about it.
   open-issue:
     if: failure()
@@ -103,6 +169,7 @@ jobs:
       - mypy
       - trial
       - sytest
+      - complement
 
     runs-on: ubuntu-latest
 
diff --git a/changelog.d/13079.misc b/changelog.d/13079.misc
new file mode 100644
index 0000000000..0133097c83
--- /dev/null
+++ b/changelog.d/13079.misc
@@ -0,0 +1 @@
+Enable Complement testing in the 'Twisted Trunk' CI runs.
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index c676f83775..22707ed142 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -62,7 +62,13 @@ WORKDIR /synapse
 # Copy just what we need to run `poetry export`...
 COPY pyproject.toml poetry.lock /synapse/
 
-RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt
+
+# If specified, we won't verify the hashes of dependencies.
+# This is only needed if the hashes of dependencies cannot be checked for some
+# reason, such as when a git repository is used directly as a dependency.
+ARG TEST_ONLY_SKIP_DEP_HASH_VERIFICATION
+
+RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt ${TEST_ONLY_SKIP_DEP_HASH_VERIFICATION:+--without-hashes}
 
 ###
 ### Stage 1: builder
@@ -85,6 +91,7 @@ RUN \
     openssl \
     rustc \
     zlib1g-dev \
+    git \
     && rm -rf /var/lib/apt/lists/*
 
 # To speed up rebuilds, install all of the dependencies before we copy over
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 20df5fbc24..8448d49e26 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -23,6 +23,9 @@
 #
 # ./complement.sh -run "TestOutboundFederation(Profile|Send)"
 #
+# Specifying TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 will cause `poetry export`
+# to not emit any hashes when building the Docker image. This then means that
+# you can use 'unverifiable' sources such as git repositories as dependencies.
 
 # Exit if a line returns a non-zero exit code
 set -e
@@ -86,7 +89,9 @@ fi
 if [ -z "$skip_docker_build" ]; then
     # Build the base Synapse image from the local checkout
     echo_if_github "::group::Build Docker image: matrixdotorg/synapse"
-    docker build -t matrixdotorg/synapse -f "docker/Dockerfile" .
+    docker build -t matrixdotorg/synapse \
+      --build-arg TEST_ONLY_SKIP_DEP_HASH_VERIFICATION \
+      -f "docker/Dockerfile" .
     echo_if_github "::endgroup::"
 
     # Build the workers docker image (from the base Synapse image we just built).

From 8d7491a1520057a3195ea35533a976fa3f3b8e6d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 1 Jul 2022 19:01:54 +0200
Subject: [PATCH 060/178] matrix-synapse-ldap3: 0.2.0 -> 0.2.1 (#13156)

---
 changelog.d/13156.bugfix | 1 +
 poetry.lock              | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13156.bugfix

diff --git a/changelog.d/13156.bugfix b/changelog.d/13156.bugfix
new file mode 100644
index 0000000000..c5ca487c26
--- /dev/null
+++ b/changelog.d/13156.bugfix
@@ -0,0 +1 @@
+Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) includled in matrix.org docker images and debian packages to 0.2.1. This fixes [problems involving usernames that have uppercase characters](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163).
diff --git a/poetry.lock b/poetry.lock
index 49fbaab577..f069f692d5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -540,7 +540,7 @@ test = ["tox", "twisted", "aiounittest"]
 
 [[package]]
 name = "matrix-synapse-ldap3"
-version = "0.2.0"
+version = "0.2.1"
 description = "An LDAP3 auth provider for Synapse"
 category = "main"
 optional = true
@@ -552,7 +552,7 @@ service-identity = "*"
 Twisted = ">=15.1.0"
 
 [package.extras]
-dev = ["matrix-synapse", "tox", "ldaptor", "mypy (==0.910)", "types-setuptools", "black (==21.9b0)", "flake8 (==4.0.1)", "isort (==5.9.3)"]
+dev = ["matrix-synapse", "tox", "ldaptor", "mypy (==0.910)", "types-setuptools", "black (==22.3.0)", "flake8 (==4.0.1)", "isort (==5.9.3)"]
 
 [[package]]
 name = "mccabe"
@@ -2046,8 +2046,8 @@ matrix-common = [
     {file = "matrix_common-1.2.1.tar.gz", hash = "sha256:a99dcf02a6bd95b24a5a61b354888a2ac92bf2b4b839c727b8dd9da2cdfa3853"},
 ]
 matrix-synapse-ldap3 = [
-    {file = "matrix-synapse-ldap3-0.2.0.tar.gz", hash = "sha256:91a0715b43a41ec3033244174fca20846836da98fda711fb01687f7199eecd2e"},
-    {file = "matrix_synapse_ldap3-0.2.0-py3-none-any.whl", hash = "sha256:0128ca7c3058987adc2e8a88463bb46879915bfd3d373309632813b353e30f9f"},
+    {file = "matrix-synapse-ldap3-0.2.1.tar.gz", hash = "sha256:bfb4390f4a262ffb0d6f057ff3aeb1e46d4e52ff420a064d795fb4f555f00285"},
+    {file = "matrix_synapse_ldap3-0.2.1-py3-none-any.whl", hash = "sha256:1b3310a60f1d06466f35905a269b6df95747fd1305f2b7fe638f373963b2aa2c"},
 ]
 mccabe = [
     {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},

From fa10468eb4eebb5e648aa2d4ca5c87c0cd1aed88 Mon Sep 17 00:00:00 2001
From: Till <2353100+S7evinK@users.noreply.github.com>
Date: Mon, 4 Jul 2022 14:34:50 +0200
Subject: [PATCH 061/178] [Complement] Allow device_name lookup over federation
 (#13167)

---
 changelog.d/13167.misc                              | 1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/13167.misc

diff --git a/changelog.d/13167.misc b/changelog.d/13167.misc
new file mode 100644
index 0000000000..a7c7a688de
--- /dev/null
+++ b/changelog.d/13167.misc
@@ -0,0 +1 @@
+Update config used by Complement to allow device name lookup over federation.
\ No newline at end of file
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 7c6a0fd756..b5f675bc73 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -81,6 +81,8 @@ rc_invites:
 
 federation_rr_transactions_per_room_per_second: 9999
 
+allow_device_name_lookup_over_federation: true
+
 ## Experimental Features ##
 
 experimental_features:

From 9820665597ab6a3bbb1d23d0824752967b2170dd Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 4 Jul 2022 15:15:33 +0100
Subject: [PATCH 062/178] Remove tests/utils.py from mypy's exclude list
 (#13159)

---
 changelog.d/13159.misc | 1 +
 mypy.ini               | 1 -
 tests/server.py        | 1 -
 tests/utils.py         | 4 ++--
 4 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13159.misc

diff --git a/changelog.d/13159.misc b/changelog.d/13159.misc
new file mode 100644
index 0000000000..bb5554ebe0
--- /dev/null
+++ b/changelog.d/13159.misc
@@ -0,0 +1 @@
+Improve and fix type hints.
\ No newline at end of file
diff --git a/mypy.ini b/mypy.ini
index b9b16860db..d757a88fd1 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -73,7 +73,6 @@ exclude = (?x)
    |tests/util/test_lrucache.py
    |tests/util/test_rwlock.py
    |tests/util/test_wheel_timer.py
-   |tests/utils.py
    )$
 
 [mypy-synapse.federation.transport.client]
diff --git a/tests/server.py b/tests/server.py
index b9f465971f..ce017ca0f6 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -830,7 +830,6 @@ def setup_test_homeserver(
 
     # Mock TLS
     hs.tls_server_context_factory = Mock()
-    hs.tls_client_options_factory = Mock()
 
     hs.setup()
     if homeserver_to_use == TestHomeServer:
diff --git a/tests/utils.py b/tests/utils.py
index cabb2c0dec..aca6a0083b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -64,7 +64,7 @@ def setupdb():
             password=POSTGRES_PASSWORD,
             dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
         )
-        db_conn.autocommit = True
+        db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
         cur = db_conn.cursor()
         cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
         cur.execute(
@@ -94,7 +94,7 @@ def setupdb():
                 password=POSTGRES_PASSWORD,
                 dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
             )
-            db_conn.autocommit = True
+            db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
             cur = db_conn.cursor()
             cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
             cur.close()

From 723ce73d0253adddfb0264ff50ca4ebce0b70130 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 4 Jul 2022 16:02:21 +0100
Subject: [PATCH 063/178] Fix stuck notification counts on small servers
 (#13168)

---
 changelog.d/13168.bugfix                             |  1 +
 synapse/storage/databases/main/event_push_actions.py |  9 +++++++--
 tests/storage/test_event_push_actions.py             | 10 +++++-----
 3 files changed, 13 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13168.bugfix

diff --git a/changelog.d/13168.bugfix b/changelog.d/13168.bugfix
new file mode 100644
index 0000000000..f462260c59
--- /dev/null
+++ b/changelog.d/13168.bugfix
@@ -0,0 +1 @@
+Fix unread counts for users on small servers. Introduced in v1.62.0rc1.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 7d4754b3d3..505616e210 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -972,7 +972,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
         stream_row = txn.fetchone()
         if stream_row:
             (offset_stream_ordering,) = stream_row
-            rotate_to_stream_ordering = offset_stream_ordering
+
+            # We need to bound by the current token to ensure that we handle
+            # out-of-order writes correctly.
+            rotate_to_stream_ordering = min(
+                offset_stream_ordering, self._stream_id_gen.get_current_token()
+            )
             caught_up = False
         else:
             rotate_to_stream_ordering = self._stream_id_gen.get_current_token()
@@ -1004,7 +1009,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas
                 SELECT user_id, room_id, count(*) as cnt,
                     max(stream_ordering) as stream_ordering
                 FROM event_push_actions
-                WHERE ? <= stream_ordering AND stream_ordering < ?
+                WHERE ? < stream_ordering AND stream_ordering <= ?
                     AND %s = 1
                 GROUP BY user_id, room_id
             ) AS upd
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 684485ae06..852b663387 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -146,12 +146,12 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _assert_counts(0, 0)
         _inject_actions(1, PlAIN_NOTIF)
         _assert_counts(1, 0)
-        _rotate(2)
+        _rotate(1)
         _assert_counts(1, 0)
 
         _inject_actions(3, PlAIN_NOTIF)
         _assert_counts(2, 0)
-        _rotate(4)
+        _rotate(3)
         _assert_counts(2, 0)
 
         _inject_actions(5, PlAIN_NOTIF)
@@ -162,7 +162,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _assert_counts(0, 0)
 
         _inject_actions(6, PlAIN_NOTIF)
-        _rotate(7)
+        _rotate(6)
         _assert_counts(1, 0)
 
         self.get_success(
@@ -178,13 +178,13 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
         _inject_actions(8, HIGHLIGHT)
         _assert_counts(1, 1)
-        _rotate(9)
+        _rotate(8)
         _assert_counts(1, 1)
 
         # Check that adding another notification and rotating after highlight
         # works.
         _inject_actions(10, PlAIN_NOTIF)
-        _rotate(11)
+        _rotate(10)
         _assert_counts(2, 1)
 
         # Check that sending read receipts at different points results in the

From 046d87756bc157af83e4f75b514490464a89d3d0 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Mon, 4 Jul 2022 16:16:47 +0100
Subject: [PATCH 064/178] 1.62.0rc3

---
 CHANGES.md               | 10 ++++++++++
 changelog.d/13156.bugfix |  1 -
 changelog.d/13168.bugfix |  1 -
 debian/changelog         |  6 ++++++
 pyproject.toml           |  2 +-
 5 files changed, 17 insertions(+), 3 deletions(-)
 delete mode 100644 changelog.d/13156.bugfix
 delete mode 100644 changelog.d/13168.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 50b4da5f61..1fb1ff9abc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,13 @@
+Synapse 1.62.0rc3 (2022-07-04)
+==============================
+
+Bugfixes
+--------
+
+- Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) includled in matrix.org docker images and debian packages to 0.2.1. This fixes [problems involving usernames that have uppercase characters](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163). ([\#13156](https://github.com/matrix-org/synapse/issues/13156))
+- Fix unread counts for users on small servers. Introduced in v1.62.0rc1. ([\#13168](https://github.com/matrix-org/synapse/issues/13168))
+
+
 Synapse 1.62.0rc2 (2022-07-01)
 ==============================
 
diff --git a/changelog.d/13156.bugfix b/changelog.d/13156.bugfix
deleted file mode 100644
index c5ca487c26..0000000000
--- a/changelog.d/13156.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) includled in matrix.org docker images and debian packages to 0.2.1. This fixes [problems involving usernames that have uppercase characters](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163).
diff --git a/changelog.d/13168.bugfix b/changelog.d/13168.bugfix
deleted file mode 100644
index f462260c59..0000000000
--- a/changelog.d/13168.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix unread counts for users on small servers. Introduced in v1.62.0rc1.
diff --git a/debian/changelog b/debian/changelog
index 295532196b..c3a727800c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.62.0~rc3) stable; urgency=medium
+
+  * New Synapse release 1.62.0rc3.
+
+ -- Synapse Packaging team <packages@matrix.org>  Mon, 04 Jul 2022 16:07:01 +0100
+
 matrix-synapse-py3 (1.62.0~rc2) stable; urgency=medium
 
   * New Synapse release 1.62.0rc2.
diff --git a/pyproject.toml b/pyproject.toml
index 1abbf0f5e5..b9f2ea432c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.62.0rc2"
+version = "1.62.0rc3"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From 95a260da7342ef654a6da4985e0539225969ddde Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Mon, 4 Jul 2022 16:29:04 +0100
Subject: [PATCH 065/178] Update changelog for v1.62.0rc2

---
 CHANGES.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 1fb1ff9abc..babfe1628f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,8 +4,8 @@ Synapse 1.62.0rc3 (2022-07-04)
 Bugfixes
 --------
 
-- Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) includled in matrix.org docker images and debian packages to 0.2.1. This fixes [problems involving usernames that have uppercase characters](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163). ([\#13156](https://github.com/matrix-org/synapse/issues/13156))
-- Fix unread counts for users on small servers. Introduced in v1.62.0rc1. ([\#13168](https://github.com/matrix-org/synapse/issues/13168))
+- Update the version of the [ldap3 plugin](https://github.com/matrix-org/matrix-synapse-ldap3/) included in the `matrixdotorg/synapse` DockerHub images and the Debian packages hosted on `packages.matrix.org` to 0.2.1. This fixes [a bug](https://github.com/matrix-org/matrix-synapse-ldap3/pull/163) with usernames containing uppercase characters. ([\#13156](https://github.com/matrix-org/synapse/issues/13156))
+- Fix a bug introduced in Synapse 1.62.0rc1 affecting unread counts for users on small servers. ([\#13168](https://github.com/matrix-org/synapse/issues/13168))
 
 
 Synapse 1.62.0rc2 (2022-07-01)

From dcc4e0621cc101271efc573600bd7591a12cea7c Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 4 Jul 2022 17:47:44 +0100
Subject: [PATCH 066/178] Up the dependency on canonicaljson to ^1.5.0

---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b9f2ea432c..c098b8df03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,7 +110,9 @@ jsonschema = ">=3.0.0"
 frozendict = ">=1,!=2.1.2"
 # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0
 unpaddedbase64 = ">=2.1.0"
-canonicaljson = "^1.4.0"
+# We require 1.5.0 to work around an issue when running against the C implementation of
+# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36
+canonicaljson = "^1.5.0"
 # we use the type definitions added in signedjson 1.1.
 signedjson = "^1.1.0"
 # validating SSL certs for IP addresses requires service_identity 18.1.

From 5b5c943e7d978475c30b52941b678eac36008dc9 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 4 Jul 2022 17:48:09 +0100
Subject: [PATCH 067/178] Revert "Up the dependency on canonicaljson to ^1.5.0"

This reverts commit dcc4e0621cc101271efc573600bd7591a12cea7c.
---
 pyproject.toml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c098b8df03..b9f2ea432c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,9 +110,7 @@ jsonschema = ">=3.0.0"
 frozendict = ">=1,!=2.1.2"
 # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0
 unpaddedbase64 = ">=2.1.0"
-# We require 1.5.0 to work around an issue when running against the C implementation of
-# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36
-canonicaljson = "^1.5.0"
+canonicaljson = "^1.4.0"
 # we use the type definitions added in signedjson 1.1.
 signedjson = "^1.1.0"
 # validating SSL certs for IP addresses requires service_identity 18.1.

From d102ad67fddc650c34baa89dc7b2926d46a9aeca Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 4 Jul 2022 18:08:56 +0100
Subject: [PATCH 068/178] annotate tests.server.FakeChannel (#13136)

---
 changelog.d/13136.misc              |  1 +
 tests/rest/admin/test_room.py       |  4 +--
 tests/rest/admin/test_user.py       |  2 +-
 tests/rest/client/test_account.py   |  5 ++--
 tests/rest/client/test_profile.py   | 10 +++++---
 tests/rest/client/test_relations.py |  2 +-
 tests/server.py                     | 38 ++++++++++++++++-------------
 7 files changed, 36 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/13136.misc

diff --git a/changelog.d/13136.misc b/changelog.d/13136.misc
new file mode 100644
index 0000000000..6cf451d8cf
--- /dev/null
+++ b/changelog.d/13136.misc
@@ -0,0 +1 @@
+Add type annotations to `tests.server`.
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index ca6af9417b..230dc76f72 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1579,8 +1579,8 @@ class RoomTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
         self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)
-        self.assertEqual(room_id, channel.json_body.get("rooms")[0].get("room_id"))
-        self.assertEqual("ж", channel.json_body.get("rooms")[0].get("name"))
+        self.assertEqual(room_id, channel.json_body["rooms"][0].get("room_id"))
+        self.assertEqual("ж", channel.json_body["rooms"][0].get("name"))
 
     def test_single_room(self) -> None:
         """Test that a single room can be requested correctly"""
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 0d44102237..e32aaadb98 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1488,7 +1488,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
 
         if channel.code != HTTPStatus.OK:
             raise HttpResponseException(
-                channel.code, channel.result["reason"], channel.json_body
+                channel.code, channel.result["reason"], channel.result["body"]
             )
 
         # Set monthly active users to the limit
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index a43a137273..1f9b65351e 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -949,7 +949,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
         client_secret: str,
         next_link: Optional[str] = None,
         expect_code: int = 200,
-    ) -> str:
+    ) -> Optional[str]:
         """Request a validation token to add an email address to a user's account
 
         Args:
@@ -959,7 +959,8 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             expect_code: Expected return code of the call
 
         Returns:
-            The ID of the new threepid validation session
+            The ID of the new threepid validation session, or None if the response
+            did not contain a session ID.
         """
         body = {"client_secret": client_secret, "email": email, "send_attempt": 1}
         if next_link:
diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py
index 29bed0e872..8de5a342ae 100644
--- a/tests/rest/client/test_profile.py
+++ b/tests/rest/client/test_profile.py
@@ -153,18 +153,22 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(channel.code, 400, channel.result)
 
-    def _get_displayname(self, name: Optional[str] = None) -> str:
+    def _get_displayname(self, name: Optional[str] = None) -> Optional[str]:
         channel = self.make_request(
             "GET", "/profile/%s/displayname" % (name or self.owner,)
         )
         self.assertEqual(channel.code, 200, channel.result)
-        return channel.json_body["displayname"]
+        # FIXME: If a user has no displayname set, Synapse returns 200 and omits a
+        # displayname from the response. This contradicts the spec, see #13137.
+        return channel.json_body.get("displayname")
 
-    def _get_avatar_url(self, name: Optional[str] = None) -> str:
+    def _get_avatar_url(self, name: Optional[str] = None) -> Optional[str]:
         channel = self.make_request(
             "GET", "/profile/%s/avatar_url" % (name or self.owner,)
         )
         self.assertEqual(channel.code, 200, channel.result)
+        # FIXME: If a user has no avatar set, Synapse returns 200 and omits an
+        # avatar_url from the response. This contradicts the spec, see #13137.
         return channel.json_body.get("avatar_url")
 
     @unittest.override_config({"max_avatar_size": 50})
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index aa84906548..ad03eee17b 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -800,7 +800,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
             )
             expected_event_ids.append(channel.json_body["event_id"])
 
-        prev_token = ""
+        prev_token: Optional[str] = ""
         found_event_ids: List[str] = []
         for _ in range(20):
             from_token = ""
diff --git a/tests/server.py b/tests/server.py
index ce017ca0f6..df3f1564c9 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -43,6 +43,7 @@ from twisted.internet.defer import Deferred, fail, maybeDeferred, succeed
 from twisted.internet.error import DNSLookupError
 from twisted.internet.interfaces import (
     IAddress,
+    IConsumer,
     IHostnameResolver,
     IProtocol,
     IPullProducer,
@@ -53,11 +54,7 @@ from twisted.internet.interfaces import (
     ITransport,
 )
 from twisted.python.failure import Failure
-from twisted.test.proto_helpers import (
-    AccumulatingProtocol,
-    MemoryReactor,
-    MemoryReactorClock,
-)
+from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock
 from twisted.web.http_headers import Headers
 from twisted.web.resource import IResource
 from twisted.web.server import Request, Site
@@ -96,6 +93,7 @@ class TimedOutException(Exception):
     """
 
 
+@implementer(IConsumer)
 @attr.s(auto_attribs=True)
 class FakeChannel:
     """
@@ -104,7 +102,7 @@ class FakeChannel:
     """
 
     site: Union[Site, "FakeSite"]
-    _reactor: MemoryReactor
+    _reactor: MemoryReactorClock
     result: dict = attr.Factory(dict)
     _ip: str = "127.0.0.1"
     _producer: Optional[Union[IPullProducer, IPushProducer]] = None
@@ -122,7 +120,7 @@ class FakeChannel:
         self._request = request
 
     @property
-    def json_body(self):
+    def json_body(self) -> JsonDict:
         return json.loads(self.text_body)
 
     @property
@@ -140,7 +138,7 @@ class FakeChannel:
         return self.result.get("done", False)
 
     @property
-    def code(self):
+    def code(self) -> int:
         if not self.result:
             raise Exception("No result yet.")
         return int(self.result["code"])
@@ -160,7 +158,7 @@ class FakeChannel:
         self.result["reason"] = reason
         self.result["headers"] = headers
 
-    def write(self, content):
+    def write(self, content: bytes) -> None:
         assert isinstance(content, bytes), "Should be bytes! " + repr(content)
 
         if "body" not in self.result:
@@ -168,11 +166,16 @@ class FakeChannel:
 
         self.result["body"] += content
 
-    def registerProducer(self, producer, streaming):
+    # Type ignore: mypy doesn't like the fact that producer isn't an IProducer.
+    def registerProducer(  # type: ignore[override]
+        self,
+        producer: Union[IPullProducer, IPushProducer],
+        streaming: bool,
+    ) -> None:
         self._producer = producer
         self.producerStreaming = streaming
 
-        def _produce():
+        def _produce() -> None:
             if self._producer:
                 self._producer.resumeProducing()
                 self._reactor.callLater(0.1, _produce)
@@ -180,31 +183,32 @@ class FakeChannel:
         if not streaming:
             self._reactor.callLater(0.0, _produce)
 
-    def unregisterProducer(self):
+    def unregisterProducer(self) -> None:
         if self._producer is None:
             return
 
         self._producer = None
 
-    def requestDone(self, _self):
+    def requestDone(self, _self: Request) -> None:
         self.result["done"] = True
         if isinstance(_self, SynapseRequest):
+            assert _self.logcontext is not None
             self.resource_usage = _self.logcontext.get_resource_usage()
 
-    def getPeer(self):
+    def getPeer(self) -> IAddress:
         # We give an address so that getClientAddress/getClientIP returns a non null entry,
         # causing us to record the MAU
         return address.IPv4Address("TCP", self._ip, 3423)
 
-    def getHost(self):
+    def getHost(self) -> IAddress:
         # this is called by Request.__init__ to configure Request.host.
         return address.IPv4Address("TCP", "127.0.0.1", 8888)
 
-    def isSecure(self):
+    def isSecure(self) -> bool:
         return False
 
     @property
-    def transport(self):
+    def transport(self) -> "FakeChannel":
         return self
 
     def await_result(self, timeout_ms: int = 1000) -> None:

From e514495465a52531da6c833e4c926f3d1625ae5e Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Tue, 5 Jul 2022 11:10:26 +0200
Subject: [PATCH 069/178] Add missing links to config options (#13166)

---
 changelog.d/13166.doc                            | 1 +
 docs/usage/configuration/config_documentation.md | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13166.doc

diff --git a/changelog.d/13166.doc b/changelog.d/13166.doc
new file mode 100644
index 0000000000..2d92e341ed
--- /dev/null
+++ b/changelog.d/13166.doc
@@ -0,0 +1 @@
+Add missing links to config options.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 82edd53e36..ef411c5356 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -591,7 +591,7 @@ Example configuration:
 dummy_events_threshold: 5
 ```
 ---
-Config option `delete_stale_devices_after`
+### `delete_stale_devices_after`
 
 An optional duration. If set, Synapse will run a daily background task to log out and
 delete any device that hasn't been accessed for more than the specified amount of time.
@@ -1843,7 +1843,7 @@ Example configuration:
 turn_shared_secret: "YOUR_SHARED_SECRET"
 ```
 ----
-Config options: `turn_username` and `turn_password`
+### `turn_username` and `turn_password`
 
 The Username and password if the TURN server needs them and does not use a token.
 
@@ -3373,7 +3373,7 @@ alias_creation_rules:
     action: deny
 ```
 ---
-Config options: `room_list_publication_rules`
+### `room_list_publication_rules`
 
 The `room_list_publication_rules` option controls who can publish and
 which rooms can be published in the public room list.

From 65e675504fe060e5e99e145be450fe4d492f404f Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 5 Jul 2022 10:46:20 +0100
Subject: [PATCH 070/178] Add the ability to set the log level using the
 `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. (#13152)

---
 changelog.d/13152.misc                 |  1 +
 docker/README.md                       |  7 +++++++
 docker/conf/log.config                 |  6 ++++++
 docker/configure_workers_and_start.py  | 20 ++++++++++++++------
 docs/development/contributing_guide.md |  4 ++++
 scripts-dev/complement.sh              | 12 ++++++++++++
 6 files changed, 44 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13152.misc

diff --git a/changelog.d/13152.misc b/changelog.d/13152.misc
new file mode 100644
index 0000000000..0c919ab700
--- /dev/null
+++ b/changelog.d/13152.misc
@@ -0,0 +1 @@
+Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`.
\ No newline at end of file
diff --git a/docker/README.md b/docker/README.md
index 67c3bc65f0..5b7de2fe38 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -67,6 +67,13 @@ The following environment variables are supported in `generate` mode:
 * `UID`, `GID`: the user id and group id to use for creating the data
   directories. If unset, and no user is set via `docker run --user`, defaults
   to `991`, `991`.
+* `SYNAPSE_LOG_LEVEL`: the log level to use (one of `DEBUG`, `INFO`, `WARNING` or `ERROR`).
+  Defaults to `INFO`.
+* `SYNAPSE_LOG_SENSITIVE`: if set and the log level is set to `DEBUG`, Synapse
+  will log sensitive information such as access tokens.
+  This should not be needed unless you are a developer attempting to debug something
+  particularly tricky.
+
 
 ## Postgres
 
diff --git a/docker/conf/log.config b/docker/conf/log.config
index d9e85aa533..90b5179838 100644
--- a/docker/conf/log.config
+++ b/docker/conf/log.config
@@ -49,11 +49,17 @@ handlers:
     class: logging.StreamHandler
     formatter: precise
 
+{% if not SYNAPSE_LOG_SENSITIVE %}
+{#
+  If SYNAPSE_LOG_SENSITIVE is unset, then override synapse.storage.SQL to INFO
+  so that DEBUG entries (containing sensitive information) are not emitted.
+#}
 loggers:
     synapse.storage.SQL:
         # beware: increasing this to DEBUG will make synapse log sensitive
         # information such as access tokens.
         level: INFO
+{% endif %}
 
 root:
     level: {{ SYNAPSE_LOG_LEVEL or "INFO" }}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 4521f99eb4..51583dc13d 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -29,6 +29,10 @@
 #   * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher,
 #         only intended for usage in Complement at the moment.
 #         No stability guarantees are provided.
+#   * SYNAPSE_LOG_LEVEL: Set this to DEBUG, INFO, WARNING or ERROR to change the
+#         log level. INFO is the default.
+#   * SYNAPSE_LOG_SENSITIVE: If unset, SQL and SQL values won't be logged,
+#         regardless of the SYNAPSE_LOG_LEVEL setting.
 #
 # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined
 # in the project's README), this script may be run multiple times, and functionality should
@@ -38,7 +42,7 @@ import os
 import subprocess
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Set
+from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Optional, Set
 
 import yaml
 from jinja2 import Environment, FileSystemLoader
@@ -552,13 +556,17 @@ def generate_worker_log_config(
     Returns: the path to the generated file
     """
     # Check whether we should write worker logs to disk, in addition to the console
-    extra_log_template_args = {}
+    extra_log_template_args: Dict[str, Optional[str]] = {}
     if environ.get("SYNAPSE_WORKERS_WRITE_LOGS_TO_DISK"):
-        extra_log_template_args["LOG_FILE_PATH"] = "{dir}/logs/{name}.log".format(
-            dir=data_dir, name=worker_name
-        )
+        extra_log_template_args["LOG_FILE_PATH"] = f"{data_dir}/logs/{worker_name}.log"
+
+    extra_log_template_args["SYNAPSE_LOG_LEVEL"] = environ.get("SYNAPSE_LOG_LEVEL")
+    extra_log_template_args["SYNAPSE_LOG_SENSITIVE"] = environ.get(
+        "SYNAPSE_LOG_SENSITIVE"
+    )
+
     # Render and write the file
-    log_config_filepath = "/conf/workers/{name}.log.config".format(name=worker_name)
+    log_config_filepath = f"/conf/workers/{worker_name}.log.config"
     convert(
         "/conf/log.config",
         log_config_filepath,
diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md
index 4738f8a6b6..900369b80f 100644
--- a/docs/development/contributing_guide.md
+++ b/docs/development/contributing_guide.md
@@ -309,6 +309,10 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data
 - Passing `POSTGRES=1` as an environment variable to use the Postgres database instead.
 - Passing `WORKERS=1` as an environment variable to use a workerised setup instead. This option implies the use of Postgres.
 
+To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g:
+```sh
+SYNAPSE_TEST_LOG_LEVEL=DEBUG COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -run TestImportHistoricalMessages
+```
 
 ### Prettier formatting with `gotestfmt`
 
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 8448d49e26..705243ca9b 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -145,6 +145,18 @@ else
   test_tags="$test_tags,faster_joins"
 fi
 
+
+if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then
+  # Set the log level to what is desired
+  export PASS_SYNAPSE_LOG_LEVEL="$SYNAPSE_TEST_LOG_LEVEL"
+
+  # Allow logging sensitive things (currently SQL queries & parameters).
+  # (This won't have any effect if we're not logging at DEBUG level overall.)
+  # Since this is just a test suite, this is fine and won't reveal anyone's
+  # personal information
+  export PASS_SYNAPSE_LOG_SENSITIVE=1
+fi
+
 # Run the tests!
 echo "Images built; running complement"
 cd "$COMPLEMENT_DIR"

From cf63d57dcee264b43afb424a514da1dc7cf03a91 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 5 Jul 2022 11:14:27 +0100
Subject: [PATCH 071/178] 1.62.0

---
 CHANGES.md       | 6 ++++++
 debian/changelog | 6 ++++++
 pyproject.toml   | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index babfe1628f..2db96249e0 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,9 @@
+Synapse 1.62.0 (2022-07-05)
+===========================
+
+No significant changes since 1.62.0rc3.
+
+
 Synapse 1.62.0rc3 (2022-07-04)
 ==============================
 
diff --git a/debian/changelog b/debian/changelog
index c3a727800c..520d8d20ae 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.62.0) stable; urgency=medium
+
+  * New Synapse release 1.62.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 05 Jul 2022 11:14:15 +0100
+
 matrix-synapse-py3 (1.62.0~rc3) stable; urgency=medium
 
   * New Synapse release 1.62.0rc3.
diff --git a/pyproject.toml b/pyproject.toml
index b9f2ea432c..4d1007fcb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.62.0rc3"
+version = "1.62.0"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From b51a0f4be0287f88a747952fb3cc8132d29df4c8 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 5 Jul 2022 11:19:54 +0100
Subject: [PATCH 072/178] Mention the spamchecker plugins

---
 CHANGES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 2db96249e0..ec27cda1b2 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -3,6 +3,8 @@ Synapse 1.62.0 (2022-07-05)
 
 No significant changes since 1.62.0rc3.
 
+Authors of spam-checker plugins should consult the [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.62/docs/upgrade.md#upgrading-to-v1620) to learn about the enriched signatures for spam checker callbacks, which are supported with this release of Synapse.
+
 
 Synapse 1.62.0rc3 (2022-07-04)
 ==============================

From 2c2a42cc107fb02bbf7c8d4e6141cbe601221629 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 5 Jul 2022 05:56:06 -0500
Subject: [PATCH 073/178] Fix application service not being able to join remote
 federated room without a profile set (#13131)

Fix https://github.com/matrix-org/synapse/issues/4778

Complement tests: https://github.com/matrix-org/complement/pull/399
---
 changelog.d/13131.bugfix        |  1 +
 synapse/handlers/room_member.py | 32 +++++++++++++++++++++++---------
 2 files changed, 24 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13131.bugfix

diff --git a/changelog.d/13131.bugfix b/changelog.d/13131.bugfix
new file mode 100644
index 0000000000..06602f03fe
--- /dev/null
+++ b/changelog.d/13131.bugfix
@@ -0,0 +1 @@
+Fix application service not being able to join remote federated room without a profile set.
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 5648ab4bf4..a1d8875dd8 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -846,10 +846,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
                 content["membership"] = Membership.JOIN
 
-                profile = self.profile_handler
-                if not content_specified:
-                    content["displayname"] = await profile.get_displayname(target)
-                    content["avatar_url"] = await profile.get_avatar_url(target)
+                try:
+                    profile = self.profile_handler
+                    if not content_specified:
+                        content["displayname"] = await profile.get_displayname(target)
+                        content["avatar_url"] = await profile.get_avatar_url(target)
+                except Exception as e:
+                    logger.info(
+                        "Failed to get profile information while processing remote join for %r: %s",
+                        target,
+                        e,
+                    )
 
                 if requester.is_guest:
                     content["kind"] = "guest"
@@ -926,11 +933,18 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
                 content["membership"] = Membership.KNOCK
 
-                profile = self.profile_handler
-                if "displayname" not in content:
-                    content["displayname"] = await profile.get_displayname(target)
-                if "avatar_url" not in content:
-                    content["avatar_url"] = await profile.get_avatar_url(target)
+                try:
+                    profile = self.profile_handler
+                    if "displayname" not in content:
+                        content["displayname"] = await profile.get_displayname(target)
+                    if "avatar_url" not in content:
+                        content["avatar_url"] = await profile.get_avatar_url(target)
+                except Exception as e:
+                    logger.info(
+                        "Failed to get profile information while processing remote knock for %r: %s",
+                        target,
+                        e,
+                    )
 
                 return await self.remote_knock(
                     remote_room_hosts, room_id, target, content

From 578a5e24a905c5d90d5c609cb485a5ab7277f8a5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 5 Jul 2022 13:51:04 +0100
Subject: [PATCH 074/178] Use upserts for updating `event_push_summary`
 (#13153)

---
 changelog.d/13153.misc                        |  1 +
 .../databases/main/event_push_actions.py      | 47 +++----------------
 2 files changed, 8 insertions(+), 40 deletions(-)
 create mode 100644 changelog.d/13153.misc

diff --git a/changelog.d/13153.misc b/changelog.d/13153.misc
new file mode 100644
index 0000000000..3bb51962e7
--- /dev/null
+++ b/changelog.d/13153.misc
@@ -0,0 +1 @@
+Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index bb6e104d71..32536430aa 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1013,8 +1013,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         sql = """
             SELECT user_id, room_id,
                 coalesce(old.%s, 0) + upd.cnt,
-                upd.stream_ordering,
-                old.user_id
+                upd.stream_ordering
             FROM (
                 SELECT user_id, room_id, count(*) as cnt,
                     max(stream_ordering) as stream_ordering
@@ -1042,7 +1041,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             summaries[(row[0], row[1])] = _EventPushSummary(
                 unread_count=row[2],
                 stream_ordering=row[3],
-                old_user_id=row[4],
                 notif_count=0,
             )
 
@@ -1063,57 +1061,27 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 summaries[(row[0], row[1])] = _EventPushSummary(
                     unread_count=0,
                     stream_ordering=row[3],
-                    old_user_id=row[4],
                     notif_count=row[2],
                 )
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
-        # If the `old.user_id` above is NULL then we know there isn't already an
-        # entry in the table, so we simply insert it. Otherwise we update the
-        # existing table.
-        self.db_pool.simple_insert_many_txn(
+        self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
-            keys=(
-                "user_id",
-                "room_id",
-                "notif_count",
-                "unread_count",
-                "stream_ordering",
-            ),
-            values=[
+            key_names=("user_id", "room_id"),
+            key_values=[(user_id, room_id) for user_id, room_id in summaries],
+            value_names=("notif_count", "unread_count", "stream_ordering"),
+            value_values=[
                 (
-                    user_id,
-                    room_id,
                     summary.notif_count,
                     summary.unread_count,
                     summary.stream_ordering,
                 )
-                for ((user_id, room_id), summary) in summaries.items()
-                if summary.old_user_id is None
+                for summary in summaries.values()
             ],
         )
 
-        txn.execute_batch(
-            """
-                UPDATE event_push_summary
-                SET notif_count = ?, unread_count = ?, stream_ordering = ?
-                WHERE user_id = ? AND room_id = ?
-            """,
-            (
-                (
-                    summary.notif_count,
-                    summary.unread_count,
-                    summary.stream_ordering,
-                    user_id,
-                    room_id,
-                )
-                for ((user_id, room_id), summary) in summaries.items()
-                if summary.old_user_id is not None
-            ),
-        )
-
         txn.execute(
             "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?",
             (rotate_to_stream_ordering,),
@@ -1293,5 +1261,4 @@ class _EventPushSummary:
 
     unread_count: int
     stream_ordering: int
-    old_user_id: str
     notif_count: int

From 68695d80074f4d3bdf07970d541c07b98adffc76 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 5 Jul 2022 14:24:42 +0100
Subject: [PATCH 075/178] Factor out some common Complement CI setup commands
 to a script. (#13157)

---
 .ci/scripts/setup_complement_prerequisites.sh | 36 +++++++++++++++++++
 .github/workflows/tests.yml                   | 25 ++-----------
 .github/workflows/twisted_trunk.yml           | 27 ++------------
 changelog.d/13157.misc                        |  1 +
 4 files changed, 42 insertions(+), 47 deletions(-)
 create mode 100755 .ci/scripts/setup_complement_prerequisites.sh
 create mode 100644 changelog.d/13157.misc

diff --git a/.ci/scripts/setup_complement_prerequisites.sh b/.ci/scripts/setup_complement_prerequisites.sh
new file mode 100755
index 0000000000..4848901cbf
--- /dev/null
+++ b/.ci/scripts/setup_complement_prerequisites.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# Common commands to set up Complement's prerequisites in a GitHub Actions CI run.
+#
+# Must be called after Synapse has been checked out to `synapse/`.
+#
+set -eu
+
+alias block='{ set +x; } 2>/dev/null; func() { echo "::group::$*"; set -x; }; func'
+alias endblock='{ set +x; } 2>/dev/null; func() { echo "::endgroup::"; set -x; }; func'
+
+block Set Go Version
+  # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
+  # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
+
+  # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
+  echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
+  # Add the Go path to the PATH: We need this so we can call gotestfmt
+  echo "~/go/bin" >> $GITHUB_PATH
+endblock
+
+block Install Complement Dependencies
+  sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev
+  go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
+endblock
+
+block Install custom gotestfmt template
+  mkdir .gotestfmt/github -p
+  cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
+endblock
+
+block Check out Complement
+  # Attempt to check out the same branch of Complement as the PR. If it
+  # doesn't exist, fallback to HEAD.
+  synapse/.ci/scripts/checkout_complement.sh
+endblock
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a775f70c4e..4bc29c8207 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -332,34 +332,13 @@ jobs:
             database: Postgres
 
     steps:
-      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
-      # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
-      - name: "Set Go Version"
-        run: |
-          # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
-          echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
-          # Add the Go path to the PATH: We need this so we can call gotestfmt
-          echo "~/go/bin" >> $GITHUB_PATH
-
-      - name: "Install Complement Dependencies"
-        run: |
-          sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev
-          go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
-
       - name: Run actions/checkout@v2 for synapse
         uses: actions/checkout@v2
         with:
           path: synapse
 
-      - name: "Install custom gotestfmt template"
-        run: |
-          mkdir .gotestfmt/github -p
-          cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
-
-      # Attempt to check out the same branch of Complement as the PR. If it
-      # doesn't exist, fallback to HEAD.
-      - name: Checkout complement
-        run: synapse/.ci/scripts/checkout_complement.sh
+      - name: Prepare Complement's Prerequisites
+        run: synapse/.ci/scripts/setup_complement_prerequisites.sh
 
       - run: |
           set -o pipefail
diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml
index 12267405be..f35e82297f 100644
--- a/.github/workflows/twisted_trunk.yml
+++ b/.github/workflows/twisted_trunk.yml
@@ -114,25 +114,14 @@ jobs:
             database: Postgres
 
     steps:
-      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
-      # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
-      - name: "Set Go Version"
-        run: |
-          # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
-          echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
-          # Add the Go path to the PATH: We need this so we can call gotestfmt
-          echo "~/go/bin" >> $GITHUB_PATH
-
-      - name: "Install Complement Dependencies"
-        run: |
-          sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev
-          go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
-
       - name: Run actions/checkout@v2 for synapse
         uses: actions/checkout@v2
         with:
           path: synapse
 
+      - name: Prepare Complement's Prerequisites
+        run: synapse/.ci/scripts/setup_complement_prerequisites.sh
+
       # This step is specific to the 'Twisted trunk' test run:
       - name: Patch dependencies
         run: |
@@ -146,16 +135,6 @@ jobs:
           # NOT IN 1.1.12 poetry lock --check
         working-directory: synapse
 
-      - name: "Install custom gotestfmt template"
-        run: |
-          mkdir .gotestfmt/github -p
-          cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
-
-      # Attempt to check out the same branch of Complement as the PR. If it
-      # doesn't exist, fallback to HEAD.
-      - name: Checkout complement
-        run: synapse/.ci/scripts/checkout_complement.sh
-
       - run: |
           set -o pipefail
           TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
diff --git a/changelog.d/13157.misc b/changelog.d/13157.misc
new file mode 100644
index 0000000000..0133097c83
--- /dev/null
+++ b/changelog.d/13157.misc
@@ -0,0 +1 @@
+Enable Complement testing in the 'Twisted Trunk' CI runs.
\ No newline at end of file

From 6ba732fefe732f92b0266b17cb6e45388bbe002a Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 5 Jul 2022 15:13:47 +0100
Subject: [PATCH 076/178] Type `tests.utils` (#13028)

* Cast to postgres types when handling postgres db

* Remove unused method

* Easy annotations

* Annotate create_room

* Use `ParamSpec` to annotate looping_call

* Annotate `default_config`

* Track `now` as a float

`time_ms` returns an int like the proper Synapse `Clock`

* Introduce a `Timer` dataclass

* Introduce a Looper type

* Suppress checking of a mock

* tests.utils is typed

* Changelog

* Whoops, import ParamSpec from typing_extensions

* ditch the psycopg2 casts
---
 changelog.d/13028.misc          |   1 +
 mypy.ini                        |   3 +
 synapse/util/__init__.py        |   6 +-
 synapse/util/caches/lrucache.py |   2 +-
 tests/utils.py                  | 134 ++++++++++++++++++++++----------
 5 files changed, 101 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/13028.misc

diff --git a/changelog.d/13028.misc b/changelog.d/13028.misc
new file mode 100644
index 0000000000..4e5f3d8f91
--- /dev/null
+++ b/changelog.d/13028.misc
@@ -0,0 +1 @@
+Add type annotations to `tests.utils`.
diff --git a/mypy.ini b/mypy.ini
index d757a88fd1..ea0ab003a8 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -126,6 +126,9 @@ disallow_untyped_defs = True
 [mypy-tests.federation.transport.test_client]
 disallow_untyped_defs = True
 
+[mypy-tests.utils]
+disallow_untyped_defs = True
+
 
 ;; Dependencies without annotations
 ;; Before ignoring a module, check to see if type stubs are available.
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 6323d452e7..a90f08dd4c 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -20,6 +20,7 @@ from typing import Any, Callable, Dict, Generator, Optional
 import attr
 from frozendict import frozendict
 from matrix_common.versionstring import get_distribution_version_string
+from typing_extensions import ParamSpec
 
 from twisted.internet import defer, task
 from twisted.internet.defer import Deferred
@@ -82,6 +83,9 @@ def unwrapFirstError(failure: Failure) -> Failure:
     return failure.value.subFailure  # type: ignore[union-attr]  # Issue in Twisted's annotations
 
 
+P = ParamSpec("P")
+
+
 @attr.s(slots=True)
 class Clock:
     """
@@ -110,7 +114,7 @@ class Clock:
         return int(self.time() * 1000)
 
     def looping_call(
-        self, f: Callable, msec: float, *args: Any, **kwargs: Any
+        self, f: Callable[P, object], msec: float, *args: P.args, **kwargs: P.kwargs
     ) -> LoopingCall:
         """Call a function repeatedly.
 
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index a3b60578e3..8ed5325c5d 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -109,7 +109,7 @@ GLOBAL_ROOT = ListNode["_Node"].create_root_node()
 
 @wrap_as_background_process("LruCache._expire_old_entries")
 async def _expire_old_entries(
-    clock: Clock, expiry_seconds: int, autotune_config: Optional[dict]
+    clock: Clock, expiry_seconds: float, autotune_config: Optional[dict]
 ) -> None:
     """Walks the global cache list to find cache entries that haven't been
     accessed in the given number of seconds, or if a given memory threshold has been breached.
diff --git a/tests/utils.py b/tests/utils.py
index aca6a0083b..424cc4c2a0 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -15,12 +15,17 @@
 
 import atexit
 import os
+from typing import Any, Callable, Dict, List, Tuple, Union, overload
+
+import attr
+from typing_extensions import Literal, ParamSpec
 
 from synapse.api.constants import EventTypes
 from synapse.api.room_versions import RoomVersions
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import DEFAULT_ROOM_VERSION
 from synapse.logging.context import current_context, set_current_context
+from synapse.server import HomeServer
 from synapse.storage.database import LoggingDatabaseConnection
 from synapse.storage.engines import create_engine
 from synapse.storage.prepare_database import prepare_database
@@ -50,12 +55,11 @@ SQLITE_PERSIST_DB = os.environ.get("SYNAPSE_TEST_PERSIST_SQLITE_DB") is not None
 POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres"
 
 
-def setupdb():
+def setupdb() -> None:
     # If we're using PostgreSQL, set up the db once
     if USE_POSTGRES_FOR_TESTS:
         # create a PostgresEngine
         db_engine = create_engine({"name": "psycopg2", "args": {}})
-
         # connect to postgres to create the base database.
         db_conn = db_engine.module.connect(
             user=POSTGRES_USER,
@@ -82,11 +86,11 @@ def setupdb():
             port=POSTGRES_PORT,
             password=POSTGRES_PASSWORD,
         )
-        db_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests")
-        prepare_database(db_conn, db_engine, None)
-        db_conn.close()
+        logging_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests")
+        prepare_database(logging_conn, db_engine, None)
+        logging_conn.close()
 
-        def _cleanup():
+        def _cleanup() -> None:
             db_conn = db_engine.module.connect(
                 user=POSTGRES_USER,
                 host=POSTGRES_HOST,
@@ -103,7 +107,19 @@ def setupdb():
         atexit.register(_cleanup)
 
 
-def default_config(name, parse=False):
+@overload
+def default_config(name: str, parse: Literal[False] = ...) -> Dict[str, object]:
+    ...
+
+
+@overload
+def default_config(name: str, parse: Literal[True]) -> HomeServerConfig:
+    ...
+
+
+def default_config(
+    name: str, parse: bool = False
+) -> Union[Dict[str, object], HomeServerConfig]:
     """
     Create a reasonable test config.
     """
@@ -181,90 +197,122 @@ def default_config(name, parse=False):
     return config_dict
 
 
-def mock_getRawHeaders(headers=None):
+def mock_getRawHeaders(headers=None):  # type: ignore[no-untyped-def]
     headers = headers if headers is not None else {}
 
-    def getRawHeaders(name, default=None):
+    def getRawHeaders(name, default=None):  # type: ignore[no-untyped-def]
+        # If the requested header is present, the real twisted function returns
+        # List[str] if name is a str and List[bytes] if name is a bytes.
+        # This mock doesn't support that behaviour.
+        # Fortunately, none of the current callers of mock_getRawHeaders() provide a
+        # headers dict, so we don't encounter this discrepancy in practice.
         return headers.get(name, default)
 
     return getRawHeaders
 
 
+P = ParamSpec("P")
+
+
+@attr.s(slots=True, auto_attribs=True)
+class Timer:
+    absolute_time: float
+    callback: Callable[[], None]
+    expired: bool
+
+
+# TODO: Make this generic over a ParamSpec?
+@attr.s(slots=True, auto_attribs=True)
+class Looper:
+    func: Callable[..., Any]
+    interval: float  # seconds
+    last: float
+    args: Tuple[object, ...]
+    kwargs: Dict[str, object]
+
+
 class MockClock:
-    now = 1000
+    now = 1000.0
 
-    def __init__(self):
-        # list of lists of [absolute_time, callback, expired] in no particular
-        # order
-        self.timers = []
-        self.loopers = []
+    def __init__(self) -> None:
+        # Timers in no particular order
+        self.timers: List[Timer] = []
+        self.loopers: List[Looper] = []
 
-    def time(self):
+    def time(self) -> float:
         return self.now
 
-    def time_msec(self):
-        return self.time() * 1000
+    def time_msec(self) -> int:
+        return int(self.time() * 1000)
 
-    def call_later(self, delay, callback, *args, **kwargs):
+    def call_later(
+        self,
+        delay: float,
+        callback: Callable[P, object],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Timer:
         ctx = current_context()
 
-        def wrapped_callback():
+        def wrapped_callback() -> None:
             set_current_context(ctx)
             callback(*args, **kwargs)
 
-        t = [self.now + delay, wrapped_callback, False]
+        t = Timer(self.now + delay, wrapped_callback, False)
         self.timers.append(t)
 
         return t
 
-    def looping_call(self, function, interval, *args, **kwargs):
-        self.loopers.append([function, interval / 1000.0, self.now, args, kwargs])
+    def looping_call(
+        self,
+        function: Callable[P, object],
+        interval: float,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> None:
+        # This type-ignore should be redundant once we use a mypy release with
+        # https://github.com/python/mypy/pull/12668.
+        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))  # type: ignore[arg-type]
 
-    def cancel_call_later(self, timer, ignore_errs=False):
-        if timer[2]:
+    def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
+        if timer.expired:
             if not ignore_errs:
                 raise Exception("Cannot cancel an expired timer")
 
-        timer[2] = True
+        timer.expired = True
         self.timers = [t for t in self.timers if t != timer]
 
     # For unit testing
-    def advance_time(self, secs):
+    def advance_time(self, secs: float) -> None:
         self.now += secs
 
         timers = self.timers
         self.timers = []
 
         for t in timers:
-            time, callback, expired = t
-
-            if expired:
+            if t.expired:
                 raise Exception("Timer already expired")
 
-            if self.now >= time:
-                t[2] = True
-                callback()
+            if self.now >= t.absolute_time:
+                t.expired = True
+                t.callback()
             else:
                 self.timers.append(t)
 
         for looped in self.loopers:
-            func, interval, last, args, kwargs = looped
-            if last + interval < self.now:
-                func(*args, **kwargs)
-                looped[2] = self.now
+            if looped.last + looped.interval < self.now:
+                looped.func(*looped.args, **looped.kwargs)
+                looped.last = self.now
 
-    def advance_time_msec(self, ms):
+    def advance_time_msec(self, ms: float) -> None:
         self.advance_time(ms / 1000.0)
 
-    def time_bound_deferred(self, d, *args, **kwargs):
-        # We don't bother timing things out for now.
-        return d
 
-
-async def create_room(hs, room_id: str, creator_id: str):
+async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None:
     """Creates and persist a creation event for the given room"""
 
     persistence_store = hs.get_storage_controllers().persistence
+    assert persistence_store is not None
     store = hs.get_datastores().main
     event_builder_factory = hs.get_event_builder_factory()
     event_creation_handler = hs.get_event_creation_handler()

From 68db233f0cf16a20f21fd927374121966976d9c7 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 5 Jul 2022 16:12:52 +0100
Subject: [PATCH 077/178] Handle race between persisting an event and
 un-partial stating a room (#13100)

Whenever we want to persist an event, we first compute an event context,
which includes the state at the event and a flag indicating whether the
state is partial. After a lot of processing, we finally try to store the
event in the database, which can fail for partial state events when the
containing room has been un-partial stated in the meantime.

We detect the race as a foreign key constraint failure in the data store
layer and turn it into a special `PartialStateConflictError` exception,
which makes its way up to the method in which we computed the event
context.

To make things difficult, the exception needs to cross a replication
request: `/fed_send_events` for events coming over federation and
`/send_event` for events from clients. We transport the
`PartialStateConflictError` as a `409 Conflict` over replication and
turn `409`s back into `PartialStateConflictError`s on the worker making
the request.

All client events go through
`EventCreationHandler.handle_new_client_event`, which is called in
*a lot* of places. Instead of trying to update all the code which
creates client events, we turn the `PartialStateConflictError` into a
`429 Too Many Requests` in
`EventCreationHandler.handle_new_client_event` and hope that clients
take it as a hint to retry their request.

On the federation event side, there are 7 places which compute event
contexts. 4 of them use outlier event contexts:
`FederationEventHandler._auth_and_persist_outliers_inner`,
`FederationHandler.do_knock`, `FederationHandler.on_invite_request` and
`FederationHandler.do_remotely_reject_invite`. These events won't have
the partial state flag, so we do not need to do anything for then.

The remaining 3 paths which create events are
`FederationEventHandler.process_remote_join`,
`FederationEventHandler.on_send_membership_event` and
`FederationEventHandler._process_received_pdu`.

We can't experience the race in `process_remote_join`, unless we're
handling an additional join into a partial state room, which currently
blocks, so we make no attempt to handle it correctly.

`on_send_membership_event` is only called by
`FederationServer._on_send_membership_event`, so we catch the
`PartialStateConflictError` there and retry just once.

`_process_received_pdu` is called by `on_receive_pdu` for incoming
events and `_process_pulled_event` for backfill. The latter should never
try to persist partial state events, so we ignore it. We catch the
`PartialStateConflictError` in `on_receive_pdu` and retry just once.

Refering to the graph of code paths in
https://github.com/matrix-org/synapse/issues/12988#issuecomment-1156857648
may make the above make more sense.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13100.misc                        |  1 +
 synapse/federation/federation_server.py       | 18 ++++-
 synapse/handlers/federation.py                | 39 +++++----
 synapse/handlers/federation_event.py          | 51 +++++++++---
 synapse/handlers/message.py                   | 79 ++++++++++++------
 synapse/replication/http/federation.py        |  3 +
 synapse/replication/http/send_event.py        |  3 +
 synapse/storage/controllers/persist_events.py | 12 +++
 synapse/storage/databases/main/events.py      | 80 ++++++++++++++++---
 synapse/storage/databases/main/room.py        | 22 +++--
 10 files changed, 234 insertions(+), 74 deletions(-)
 create mode 100644 changelog.d/13100.misc

diff --git a/changelog.d/13100.misc b/changelog.d/13100.misc
new file mode 100644
index 0000000000..28f2fe0349
--- /dev/null
+++ b/changelog.d/13100.misc
@@ -0,0 +1 @@
+Faster room joins: Handle race between persisting an event and un-partial stating a room.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3e1518f1f6..5dfdc86740 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -67,6 +67,7 @@ from synapse.replication.http.federation import (
     ReplicationFederationSendEduRestServlet,
     ReplicationGetQueryRestServlet,
 )
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.lock import Lock
 from synapse.types import JsonDict, StateMap, get_domain_from_id
 from synapse.util import json_decoder, unwrapFirstError
@@ -882,9 +883,20 @@ class FederationServer(FederationBase):
             logger.warning("%s", errmsg)
             raise SynapseError(403, errmsg, Codes.FORBIDDEN)
 
-        return await self._federation_event_handler.on_send_membership_event(
-            origin, event
-        )
+        try:
+            return await self._federation_event_handler.on_send_membership_event(
+                origin, event
+            )
+        except PartialStateConflictError:
+            # The room was un-partial stated while we were persisting the event.
+            # Try once more, with full state this time.
+            logger.info(
+                "Room %s was un-partial stated during `on_send_membership_event`, trying again.",
+                room_id,
+            )
+            return await self._federation_event_handler.on_send_membership_event(
+                origin, event
+            )
 
     async def on_event_auth(
         self, origin: str, room_id: str, event_id: str
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 34cc5ecd11..3c44b4bf86 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -45,6 +45,7 @@ from synapse.api.errors import (
     FederationDeniedError,
     FederationError,
     HttpResponseException,
+    LimitExceededError,
     NotFoundError,
     RequestSendFailed,
     SynapseError,
@@ -64,6 +65,7 @@ from synapse.replication.http.federation import (
     ReplicationCleanRoomRestServlet,
     ReplicationStoreRoomOnOutlierMembershipRestServlet,
 )
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, StateMap, get_domain_from_id
@@ -549,15 +551,29 @@ class FederationHandler:
                 #   https://github.com/matrix-org/synapse/issues/12998
                 await self.store.store_partial_state_room(room_id, ret.servers_in_room)
 
-            max_stream_id = await self._federation_event_handler.process_remote_join(
-                origin,
-                room_id,
-                auth_chain,
-                state,
-                event,
-                room_version_obj,
-                partial_state=ret.partial_state,
-            )
+            try:
+                max_stream_id = (
+                    await self._federation_event_handler.process_remote_join(
+                        origin,
+                        room_id,
+                        auth_chain,
+                        state,
+                        event,
+                        room_version_obj,
+                        partial_state=ret.partial_state,
+                    )
+                )
+            except PartialStateConflictError as e:
+                # The homeserver was already in the room and it is no longer partial
+                # stated. We ought to be doing a local join instead. Turn the error into
+                # a 429, as a hint to the client to try again.
+                # TODO(faster_joins): `_should_perform_remote_join` suggests that we may
+                #   do a remote join for restricted rooms even if we have full state.
+                logger.error(
+                    "Room %s was un-partial stated while processing remote join.",
+                    room_id,
+                )
+                raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
 
             if ret.partial_state:
                 # Kick off the process of asynchronously fetching the state for this
@@ -1567,11 +1583,6 @@ class FederationHandler:
 
                 # we raced against more events arriving with partial state. Go round
                 # the loop again. We've already logged a warning, so no need for more.
-                # TODO(faster_joins): there is still a race here, whereby incoming events which raced
-                #   with us will fail to be persisted after the call to `clear_partial_state_room` due to
-                #   having partial state.
-                #   https://github.com/matrix-org/synapse/issues/12988
-                #
                 continue
 
             events = await self.store.get_events_as_list(
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 479d936dc0..c74117c19a 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -64,6 +64,7 @@ from synapse.replication.http.federation import (
     ReplicationFederationSendEventsRestServlet,
 )
 from synapse.state import StateResolutionStore
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.state import StateFilter
 from synapse.types import (
@@ -275,7 +276,16 @@ class FederationEventHandler:
                     affected=pdu.event_id,
                 )
 
-        await self._process_received_pdu(origin, pdu, state_ids=None)
+        try:
+            await self._process_received_pdu(origin, pdu, state_ids=None)
+        except PartialStateConflictError:
+            # The room was un-partial stated while we were processing the PDU.
+            # Try once more, with full state this time.
+            logger.info(
+                "Room %s was un-partial stated while processing the PDU, trying again.",
+                room_id,
+            )
+            await self._process_received_pdu(origin, pdu, state_ids=None)
 
     async def on_send_membership_event(
         self, origin: str, event: EventBase
@@ -306,6 +316,9 @@ class FederationEventHandler:
 
         Raises:
             SynapseError if the event is not accepted into the room
+            PartialStateConflictError if the room was un-partial stated in between
+                computing the state at the event and persisting it. The caller should
+                retry exactly once in this case.
         """
         logger.debug(
             "on_send_membership_event: Got event: %s, signatures: %s",
@@ -423,6 +436,8 @@ class FederationEventHandler:
 
         Raises:
             SynapseError if the response is in some way invalid.
+            PartialStateConflictError if the homeserver is already in the room and it
+                has been un-partial stated.
         """
         create_event = None
         for e in state:
@@ -1084,10 +1099,14 @@ class FederationEventHandler:
 
             state_ids: Normally None, but if we are handling a gap in the graph
                 (ie, we are missing one or more prev_events), the resolved state at the
-                event
+                event. Must not be partial state.
 
             backfilled: True if this is part of a historical batch of events (inhibits
                 notification to clients, and validation of device keys.)
+
+        PartialStateConflictError: if the room was un-partial stated in between
+            computing the state at the event and persisting it. The caller should retry
+            exactly once in this case. Will never be raised if `state_ids` is provided.
         """
         logger.debug("Processing event: %s", event)
         assert not event.internal_metadata.outlier
@@ -1933,6 +1952,9 @@ class FederationEventHandler:
             event: The event itself.
             context: The event context.
             backfilled: True if the event was backfilled.
+
+        PartialStateConflictError: if attempting to persist a partial state event in
+            a room that has been un-partial stated.
         """
         # this method should not be called on outliers (those code paths call
         # persist_events_and_notify directly.)
@@ -1985,6 +2007,10 @@ class FederationEventHandler:
 
         Returns:
             The stream ID after which all events have been persisted.
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         if not event_and_contexts:
             return self._store.get_room_max_stream_ordering()
@@ -1993,14 +2019,19 @@ class FederationEventHandler:
         if instance != self._instance_name:
             # Limit the number of events sent over replication. We choose 200
             # here as that is what we default to in `max_request_body_size(..)`
-            for batch in batch_iter(event_and_contexts, 200):
-                result = await self._send_events(
-                    instance_name=instance,
-                    store=self._store,
-                    room_id=room_id,
-                    event_and_contexts=batch,
-                    backfilled=backfilled,
-                )
+            try:
+                for batch in batch_iter(event_and_contexts, 200):
+                    result = await self._send_events(
+                        instance_name=instance,
+                        store=self._store,
+                        room_id=room_id,
+                        event_and_contexts=batch,
+                        backfilled=backfilled,
+                    )
+            except SynapseError as e:
+                if e.code == HTTPStatus.CONFLICT:
+                    raise PartialStateConflictError()
+                raise
             return result["max_stream_id"]
         else:
             assert self._storage_controllers.persistence
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index c6b40a5b7a..1980e37dae 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -37,6 +37,7 @@ from synapse.api.errors import (
     AuthError,
     Codes,
     ConsentNotGivenError,
+    LimitExceededError,
     NotFoundError,
     ShadowBanError,
     SynapseError,
@@ -53,6 +54,7 @@ from synapse.handlers.directory import DirectoryHandler
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.send_event import ReplicationSendEventRestServlet
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.state import StateFilter
 from synapse.types import (
@@ -1250,6 +1252,8 @@ class EventCreationHandler:
 
         Raises:
             ShadowBanError if the requester has been shadow-banned.
+            SynapseError(503) if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         extra_users = extra_users or []
 
@@ -1300,24 +1304,35 @@ class EventCreationHandler:
 
         # We now persist the event (and update the cache in parallel, since we
         # don't want to block on it).
-        result, _ = await make_deferred_yieldable(
-            gather_results(
-                (
-                    run_in_background(
-                        self._persist_event,
-                        requester=requester,
-                        event=event,
-                        context=context,
-                        ratelimit=ratelimit,
-                        extra_users=extra_users,
+        try:
+            result, _ = await make_deferred_yieldable(
+                gather_results(
+                    (
+                        run_in_background(
+                            self._persist_event,
+                            requester=requester,
+                            event=event,
+                            context=context,
+                            ratelimit=ratelimit,
+                            extra_users=extra_users,
+                        ),
+                        run_in_background(
+                            self.cache_joined_hosts_for_event, event, context
+                        ).addErrback(
+                            log_failure, "cache_joined_hosts_for_event failed"
+                        ),
                     ),
-                    run_in_background(
-                        self.cache_joined_hosts_for_event, event, context
-                    ).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
-                ),
-                consumeErrors=True,
+                    consumeErrors=True,
+                )
+            ).addErrback(unwrapFirstError)
+        except PartialStateConflictError as e:
+            # The event context needs to be recomputed.
+            # Turn the error into a 429, as a hint to the client to try again.
+            logger.info(
+                "Room %s was un-partial stated while persisting client event.",
+                event.room_id,
             )
-        ).addErrback(unwrapFirstError)
+            raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
 
         return result
 
@@ -1332,6 +1347,9 @@ class EventCreationHandler:
         """Actually persists the event. Should only be called by
         `handle_new_client_event`, and see its docstring for documentation of
         the arguments.
+
+        PartialStateConflictError: if attempting to persist a partial state event in
+            a room that has been un-partial stated.
         """
 
         # Skip push notification actions for historical messages
@@ -1348,16 +1366,21 @@ class EventCreationHandler:
             # If we're a worker we need to hit out to the master.
             writer_instance = self._events_shard_config.get_instance(event.room_id)
             if writer_instance != self._instance_name:
-                result = await self.send_event(
-                    instance_name=writer_instance,
-                    event_id=event.event_id,
-                    store=self.store,
-                    requester=requester,
-                    event=event,
-                    context=context,
-                    ratelimit=ratelimit,
-                    extra_users=extra_users,
-                )
+                try:
+                    result = await self.send_event(
+                        instance_name=writer_instance,
+                        event_id=event.event_id,
+                        store=self.store,
+                        requester=requester,
+                        event=event,
+                        context=context,
+                        ratelimit=ratelimit,
+                        extra_users=extra_users,
+                    )
+                except SynapseError as e:
+                    if e.code == HTTPStatus.CONFLICT:
+                        raise PartialStateConflictError()
+                    raise
                 stream_id = result["stream_id"]
                 event_id = result["event_id"]
                 if event_id != event.event_id:
@@ -1485,6 +1508,10 @@ class EventCreationHandler:
             The persisted event. This may be different than the given event if
             it was de-duplicated (e.g. because we had already persisted an
             event with the same transaction ID.)
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         extra_users = extra_users or []
 
diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py
index eed29cd597..d3abafed28 100644
--- a/synapse/replication/http/federation.py
+++ b/synapse/replication/http/federation.py
@@ -60,6 +60,9 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint):
         {
             "max_stream_id": 32443,
         }
+
+    Responds with a 409 when a `PartialStateConflictError` is raised due to an event
+    context that needs to be recomputed due to the un-partial stating of a room.
     """
 
     NAME = "fed_send_events"
diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py
index c2b2588ea5..486f04723c 100644
--- a/synapse/replication/http/send_event.py
+++ b/synapse/replication/http/send_event.py
@@ -59,6 +59,9 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint):
 
         { "stream_id": 12345, "event_id": "$abcdef..." }
 
+    Responds with a 409 when a `PartialStateConflictError` is raised due to an event
+    context that needs to be recomputed due to the un-partial stating of a room.
+
     The returned event ID may not match the sent event if it was deduplicated.
     """
 
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 4bcb99d06e..c248fccc81 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -315,6 +315,10 @@ class EventsPersistenceStorageController:
             if they were deduplicated due to an event already existing that
             matched the transaction ID; the existing event is returned in such
             a case.
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
         for event, ctx in events_and_contexts:
@@ -363,6 +367,10 @@ class EventsPersistenceStorageController:
             latest persisted event. The returned event may not match the given
             event if it was deduplicated due to an existing event matching the
             transaction ID.
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         # add_to_queue returns a map from event ID to existing event ID if the
         # event was deduplicated. (The dict may also include other entries if
@@ -453,6 +461,10 @@ class EventsPersistenceStorageController:
         Returns:
             A dictionary of event ID to event ID we didn't persist as we already
             had another event persisted with the same TXN ID.
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         replaced_events: Dict[str, str] = {}
         if not events_and_contexts:
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index a3e12f1e9b..8a0e4e9589 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -16,6 +16,7 @@
 import itertools
 import logging
 from collections import OrderedDict
+from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -35,6 +36,7 @@ from prometheus_client import Counter
 
 import synapse.metrics
 from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
@@ -69,6 +71,24 @@ event_counter = Counter(
 )
 
 
+class PartialStateConflictError(SynapseError):
+    """An internal error raised when attempting to persist an event with partial state
+    after the room containing the event has been un-partial stated.
+
+    This error should be handled by recomputing the event context and trying again.
+
+    This error has an HTTP status code so that it can be transported over replication.
+    It should not be exposed to clients.
+    """
+
+    def __init__(self) -> None:
+        super().__init__(
+            HTTPStatus.CONFLICT,
+            msg="Cannot persist partial state event in un-partial stated room",
+            errcode=Codes.UNKNOWN,
+        )
+
+
 @attr.s(slots=True, auto_attribs=True)
 class DeltaState:
     """Deltas to use to update the `current_state_events` table.
@@ -154,6 +174,10 @@ class PersistEventsStore:
 
         Returns:
             Resolves when the events have been persisted
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
 
         # We want to calculate the stream orderings as late as possible, as
@@ -354,6 +378,9 @@ class PersistEventsStore:
                 For each room, a list of the event ids which are the forward
                 extremities.
 
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         state_delta_for_room = state_delta_for_room or {}
         new_forward_extremities = new_forward_extremities or {}
@@ -1304,6 +1331,10 @@ class PersistEventsStore:
 
         Returns:
             new list, without events which are already in the events table.
+
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         txn.execute(
             "SELECT event_id, outlier FROM events WHERE event_id in (%s)"
@@ -2215,6 +2246,11 @@ class PersistEventsStore:
         txn: LoggingTransaction,
         events_and_contexts: Collection[Tuple[EventBase, EventContext]],
     ) -> None:
+        """
+        Raises:
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
+        """
         state_groups = {}
         for event, context in events_and_contexts:
             if event.internal_metadata.is_outlier():
@@ -2239,19 +2275,37 @@ class PersistEventsStore:
         # if we have partial state for these events, record the fact. (This happens
         # here rather than in _store_event_txn because it also needs to happen when
         # we de-outlier an event.)
-        self.db_pool.simple_insert_many_txn(
-            txn,
-            table="partial_state_events",
-            keys=("room_id", "event_id"),
-            values=[
-                (
-                    event.room_id,
-                    event.event_id,
-                )
-                for event, ctx in events_and_contexts
-                if ctx.partial_state
-            ],
-        )
+        try:
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="partial_state_events",
+                keys=("room_id", "event_id"),
+                values=[
+                    (
+                        event.room_id,
+                        event.event_id,
+                    )
+                    for event, ctx in events_and_contexts
+                    if ctx.partial_state
+                ],
+            )
+        except self.db_pool.engine.module.IntegrityError:
+            logger.info(
+                "Cannot persist events %s in rooms %s: room has been un-partial stated",
+                [
+                    event.event_id
+                    for event, ctx in events_and_contexts
+                    if ctx.partial_state
+                ],
+                list(
+                    {
+                        event.room_id
+                        for event, ctx in events_and_contexts
+                        if ctx.partial_state
+                    }
+                ),
+            )
+            raise PartialStateConflictError()
 
         self.db_pool.simple_upsert_many_txn(
             txn,
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index d8026e3fac..13d6a1d5c0 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1156,19 +1156,25 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         return room_servers
 
     async def clear_partial_state_room(self, room_id: str) -> bool:
-        # this can race with incoming events, so we watch out for FK errors.
-        # TODO(faster_joins): this still doesn't completely fix the race, since the persist process
-        #   is not atomic. I fear we need an application-level lock.
-        #   https://github.com/matrix-org/synapse/issues/12988
+        """Clears the partial state flag for a room.
+
+        Args:
+            room_id: The room whose partial state flag is to be cleared.
+
+        Returns:
+            `True` if the partial state flag has been cleared successfully.
+
+            `False` if the partial state flag could not be cleared because the room
+            still contains events with partial state.
+        """
         try:
             await self.db_pool.runInteraction(
                 "clear_partial_state_room", self._clear_partial_state_room_txn, room_id
             )
             return True
-        except self.db_pool.engine.module.DatabaseError as e:
-            # TODO(faster_joins): how do we distinguish between FK errors and other errors?
-            #   https://github.com/matrix-org/synapse/issues/12988
-            logger.warning(
+        except self.db_pool.engine.module.IntegrityError as e:
+            # Assume that any `IntegrityError`s are due to partial state events.
+            logger.info(
                 "Exception while clearing lazy partial-state-room %s, retrying: %s",
                 room_id,
                 e,

From a0f51b059c2aa1bbe0a2d6991c369cba5cf43c0a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 6 Jul 2022 12:09:19 +0100
Subject: [PATCH 078/178] Fix bug where we failed to delete old push actions
 (#13194)

This happened if we encountered a stream ordering in `event_push_actions` that had more rows than the batch size of the delete, as If we don't delete any rows in an iteration then the next time round we get the exact same stream ordering and get stuck.
---
 changelog.d/13194.bugfix                             | 1 +
 synapse/storage/databases/main/event_push_actions.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13194.bugfix

diff --git a/changelog.d/13194.bugfix b/changelog.d/13194.bugfix
new file mode 100644
index 0000000000..2c2e8bb21b
--- /dev/null
+++ b/changelog.d/13194.bugfix
@@ -0,0 +1 @@
+Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 32536430aa..a3edcbb398 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1114,7 +1114,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             txn.execute(
                 """
                 SELECT stream_ordering FROM event_push_actions
-                WHERE stream_ordering < ? AND highlight = 0
+                WHERE stream_ordering <= ? AND highlight = 0
                 ORDER BY stream_ordering ASC LIMIT 1 OFFSET ?
             """,
                 (
@@ -1129,10 +1129,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             else:
                 stream_ordering = max_stream_ordering_to_delete
 
+            # We need to use a inclusive bound here to handle the case where a
+            # single stream ordering has more than `batch_size` rows.
             txn.execute(
                 """
                 DELETE FROM event_push_actions
-                WHERE stream_ordering < ? AND highlight = 0
+                WHERE stream_ordering <= ? AND highlight = 0
                 """,
                 (stream_ordering,),
             )

From dcc7873700da4a818e84c44c6190525d39a854cb Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 6 Jul 2022 07:30:58 -0400
Subject: [PATCH 079/178] Add information on how the Synapse team does reviews.
 (#13132)

---
 changelog.d/13132.doc                  |  1 +
 docs/SUMMARY.md                        |  1 +
 docs/development/contributing_guide.md |  5 +++-
 docs/development/reviews.md            | 41 ++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13132.doc
 create mode 100644 docs/development/reviews.md

diff --git a/changelog.d/13132.doc b/changelog.d/13132.doc
new file mode 100644
index 0000000000..c577069294
--- /dev/null
+++ b/changelog.d/13132.doc
@@ -0,0 +1 @@
+Document how the Synapse team does reviews.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 3978f96fc3..8d6030e34a 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -81,6 +81,7 @@
 # Development
   - [Contributing Guide](development/contributing_guide.md)
   - [Code Style](code_style.md)
+  - [Reviewing Code](development/reviews.md)
   - [Release Cycle](development/releases.md)
   - [Git Usage](development/git.md)
   - [Testing]()
diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md
index 900369b80f..ab320cbd78 100644
--- a/docs/development/contributing_guide.md
+++ b/docs/development/contributing_guide.md
@@ -351,7 +351,7 @@ To prepare a Pull Request, please:
 3. `git push` your commit to your fork of Synapse;
 4. on GitHub, [create the Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request);
 5. add a [changelog entry](#changelog) and push it to your Pull Request;
-6. for most contributors, that's all - however, if you are a member of the organization `matrix-org`, on GitHub, please request a review from `matrix.org / Synapse Core`.
+6. that's it for now, a non-draft pull request will automatically request review from the team;
 7. if you need to update your PR, please avoid rebasing and just add new commits to your branch.
 
 
@@ -527,10 +527,13 @@ From this point, you should:
 1. Look at the results of the CI pipeline.
     - If there is any error, fix the error.
 2. If a developer has requested changes, make these changes and let us know if it is ready for a developer to review again.
+   - A pull request is a conversation, if you disagree with the suggestions, please respond and discuss it.
 3. Create a new commit with the changes.
     - Please do NOT overwrite the history. New commits make the reviewer's life easier.
     - Push this commits to your Pull Request.
 4. Back to 1.
+5. Once the pull request is ready for review again please re-request review from whichever developer did your initial
+  review (or leave a comment in the pull request that you believe all required changes have been done).
 
 Once both the CI and the developers are happy, the patch will be merged into Synapse and released shortly!
 
diff --git a/docs/development/reviews.md b/docs/development/reviews.md
new file mode 100644
index 0000000000..d0379949cb
--- /dev/null
+++ b/docs/development/reviews.md
@@ -0,0 +1,41 @@
+Some notes on how we do reviews
+===============================
+
+The Synapse team works off a shared review queue -- any new pull requests for
+Synapse (or related projects) has a review requested from the entire team. Team
+members should process this queue using the following rules:
+
+* Any high urgency pull requests (e.g. fixes for broken continuous integration
+  or fixes for release blockers);
+* Follow-up reviews for pull requests which have previously received reviews;
+* Any remaining pull requests.
+
+For the latter two categories above, older pull requests should be prioritised.
+
+It is explicit that there is no priority given to pull requests from the team
+(vs from the community). If a pull request requires a quick turn around, please
+explicitly communicate this via [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org)
+or as a comment on the pull request.
+
+Once an initial review has been completed and the author has made additional changes,
+follow-up reviews should go back to the same reviewer. This helps build a shared
+context and conversation between author and reviewer.
+
+As a team we aim to keep the number of inflight pull requests to a minimum to ensure
+that ongoing work is finished before starting new work.
+
+Performing a review
+-------------------
+
+To communicate to the rest of the team the status of each pull request, team
+members should do the following:
+
+* Assign themselves to the pull request (they should be left assigned to the
+  pull request until it is merged, closed, or are no longer the reviewer);
+* Review the pull request by leaving comments, questions, and suggestions;
+* Mark the pull request appropriately (as needing changes or accepted).
+
+If you are unsure about a particular part of the pull request (or are not confident
+in your understanding of part of the code) then ask questions or request review
+from the team again. When requesting review from the team be sure to leave a comment
+with the rationale on why you're putting it back in the queue.

From 57f6f59e3eacac61038419639f234e1eb1f230ed Mon Sep 17 00:00:00 2001
From: David Teller <D.O.Teller@gmail.com>
Date: Thu, 7 Jul 2022 10:14:32 +0200
Subject: [PATCH 080/178] Make `_get_state_map_for_room` not break when room
 state events don't contain an event id. (#13174)

Method `_get_state_map_for_room` seems to break in presence of some ill-formed events in the database. Reimplementing this method to use `get_current_state`, which is more robust to such events.
---
 changelog.d/13174.bugfix            | 1 +
 synapse/events/third_party_rules.py | 9 +--------
 2 files changed, 2 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/13174.bugfix

diff --git a/changelog.d/13174.bugfix b/changelog.d/13174.bugfix
new file mode 100644
index 0000000000..b17935b93f
--- /dev/null
+++ b/changelog.d/13174.bugfix
@@ -0,0 +1 @@
+Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages.
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
index 35f3f3690f..72ab696898 100644
--- a/synapse/events/third_party_rules.py
+++ b/synapse/events/third_party_rules.py
@@ -464,14 +464,7 @@ class ThirdPartyEventRules:
         Returns:
             A dict mapping (event type, state key) to state event.
         """
-        state_ids = await self._storage_controllers.state.get_current_state_ids(room_id)
-        room_state_events = await self.store.get_events(state_ids.values())
-
-        state_events = {}
-        for key, event_id in state_ids.items():
-            state_events[key] = room_state_events[event_id]
-
-        return state_events
+        return await self._storage_controllers.state.get_current_state(room_id)
 
     async def on_profile_update(
         self, user_id: str, new_profile: ProfileInfo, by_admin: bool, deactivation: bool

From fb7d24ab6de870ab21f83d49d9f1db569eff4b56 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 7 Jul 2022 11:08:04 +0100
Subject: [PATCH 081/178] Check that `auto_vacuum` is disabled when porting a
 SQLite database to Postgres, as `VACUUM`s must not be performed between runs
 of the script. (#13195)

---
 changelog.d/13195.misc              |  1 +
 docs/postgres.md                    |  8 +++++++
 synapse/_scripts/synapse_port_db.py | 34 +++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 changelog.d/13195.misc

diff --git a/changelog.d/13195.misc b/changelog.d/13195.misc
new file mode 100644
index 0000000000..5506f767b3
--- /dev/null
+++ b/changelog.d/13195.misc
@@ -0,0 +1 @@
+Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script.
\ No newline at end of file
diff --git a/docs/postgres.md b/docs/postgres.md
index cbc32e1836..f2519f6b0a 100644
--- a/docs/postgres.md
+++ b/docs/postgres.md
@@ -143,6 +143,14 @@ to do step 2.
 
 It is safe to at any time kill the port script and restart it.
 
+However, under no circumstances should the SQLite database be `VACUUM`ed between
+multiple runs of the script. Doing so can lead to an inconsistent copy of your database
+into Postgres.
+To avoid accidental error, the script will check that SQLite's `auto_vacuum` mechanism
+is disabled, but the script is not able to protect against a manual `VACUUM` operation
+performed either by the administrator or by any automated task that the administrator
+may have configured.
+
 Note that the database may take up significantly more (25% - 100% more)
 space on disk after porting to Postgres.
 
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index d3b4887f69..642fd41629 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -621,6 +621,25 @@ class Porter:
                 self.postgres_store.db_pool.updates.has_completed_background_updates()
             )
 
+    @staticmethod
+    def _is_sqlite_autovacuum_enabled(txn: LoggingTransaction) -> bool:
+        """
+        Returns true if auto_vacuum is enabled in SQLite.
+        https://www.sqlite.org/pragma.html#pragma_auto_vacuum
+
+        Vacuuming changes the rowids on rows in the database.
+        Auto-vacuuming is therefore dangerous when used in conjunction with this script.
+
+        Note that the auto_vacuum setting can't be changed without performing
+        a VACUUM after trying to change the pragma.
+        """
+        txn.execute("PRAGMA auto_vacuum")
+        row = txn.fetchone()
+        assert row is not None, "`PRAGMA auto_vacuum` did not give a row."
+        (autovacuum_setting,) = row
+        # 0 means off. 1 means full. 2 means incremental.
+        return autovacuum_setting != 0
+
     async def run(self) -> None:
         """Ports the SQLite database to a PostgreSQL database.
 
@@ -637,6 +656,21 @@ class Porter:
                 allow_outdated_version=True,
             )
 
+            # For safety, ensure auto_vacuums are disabled.
+            if await self.sqlite_store.db_pool.runInteraction(
+                "is_sqlite_autovacuum_enabled", self._is_sqlite_autovacuum_enabled
+            ):
+                end_error = (
+                    "auto_vacuum is enabled in the SQLite database."
+                    " (This is not the default configuration.)\n"
+                    " This script relies on rowids being consistent and must not"
+                    " be used if the database could be vacuumed between re-runs.\n"
+                    " To disable auto_vacuum, you need to stop Synapse and run the following SQL:\n"
+                    " PRAGMA auto_vacuum=off;\n"
+                    " VACUUM;"
+                )
+                return
+
             # Check if all background updates are done, abort if not.
             updates_complete = (
                 await self.sqlite_store.db_pool.updates.has_completed_background_updates()

From 4aaeb87dad274e0f67a77917b6cec88b778425cc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 7 Jul 2022 10:56:52 +0000
Subject: [PATCH 082/178] Bump lxml from 4.8.0 to 4.9.1 (#13207)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13207.docker |   1 +
 poetry.lock              | 133 +++++++++++++++++++++------------------
 2 files changed, 72 insertions(+), 62 deletions(-)
 create mode 100644 changelog.d/13207.docker

diff --git a/changelog.d/13207.docker b/changelog.d/13207.docker
new file mode 100644
index 0000000000..63ba5c8031
--- /dev/null
+++ b/changelog.d/13207.docker
@@ -0,0 +1 @@
+Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1.
diff --git a/poetry.lock b/poetry.lock
index f069f692d5..b7c0a6869a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -502,7 +502,7 @@ pyasn1 = ">=0.4.6"
 
 [[package]]
 name = "lxml"
-version = "4.8.0"
+version = "4.9.1"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
 category = "main"
 optional = true
@@ -1937,67 +1937,76 @@ ldap3 = [
     {file = "ldap3-2.9.1.tar.gz", hash = "sha256:f3e7fc4718e3f09dda568b57100095e0ce58633bcabbed8667ce3f8fbaa4229f"},
 ]
 lxml = [
-    {file = "lxml-4.8.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:e1ab2fac607842ac36864e358c42feb0960ae62c34aa4caaf12ada0a1fb5d99b"},
-    {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28d1af847786f68bec57961f31221125c29d6f52d9187c01cd34dc14e2b29430"},
-    {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b92d40121dcbd74831b690a75533da703750f7041b4bf951befc657c37e5695a"},
-    {file = "lxml-4.8.0-cp27-cp27m-win32.whl", hash = "sha256:e01f9531ba5420838c801c21c1b0f45dbc9607cb22ea2cf132844453bec863a5"},
-    {file = "lxml-4.8.0-cp27-cp27m-win_amd64.whl", hash = "sha256:6259b511b0f2527e6d55ad87acc1c07b3cbffc3d5e050d7e7bcfa151b8202df9"},
-    {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1010042bfcac2b2dc6098260a2ed022968dbdfaf285fc65a3acf8e4eb1ffd1bc"},
-    {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fa56bb08b3dd8eac3a8c5b7d075c94e74f755fd9d8a04543ae8d37b1612dd170"},
-    {file = "lxml-4.8.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:31ba2cbc64516dcdd6c24418daa7abff989ddf3ba6d3ea6f6ce6f2ed6e754ec9"},
-    {file = "lxml-4.8.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:31499847fc5f73ee17dbe1b8e24c6dafc4e8d5b48803d17d22988976b0171f03"},
-    {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5f7d7d9afc7b293147e2d506a4596641d60181a35279ef3aa5778d0d9d9123fe"},
-    {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a3c5f1a719aa11866ffc530d54ad965063a8cbbecae6515acbd5f0fae8f48eaa"},
-    {file = "lxml-4.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6268e27873a3d191849204d00d03f65c0e343b3bcb518a6eaae05677c95621d1"},
-    {file = "lxml-4.8.0-cp310-cp310-win32.whl", hash = "sha256:330bff92c26d4aee79c5bc4d9967858bdbe73fdbdbacb5daf623a03a914fe05b"},
-    {file = "lxml-4.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:b2582b238e1658c4061ebe1b4df53c435190d22457642377fd0cb30685cdfb76"},
-    {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a2bfc7e2a0601b475477c954bf167dee6d0f55cb167e3f3e7cefad906e7759f6"},
-    {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a1547ff4b8a833511eeaceacbcd17b043214fcdb385148f9c1bc5556ca9623e2"},
-    {file = "lxml-4.8.0-cp35-cp35m-win32.whl", hash = "sha256:a9f1c3489736ff8e1c7652e9dc39f80cff820f23624f23d9eab6e122ac99b150"},
-    {file = "lxml-4.8.0-cp35-cp35m-win_amd64.whl", hash = "sha256:530f278849031b0eb12f46cca0e5db01cfe5177ab13bd6878c6e739319bae654"},
-    {file = "lxml-4.8.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:078306d19a33920004addeb5f4630781aaeabb6a8d01398045fcde085091a169"},
-    {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:86545e351e879d0b72b620db6a3b96346921fa87b3d366d6c074e5a9a0b8dadb"},
-    {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24f5c5ae618395ed871b3d8ebfcbb36e3f1091fd847bf54c4de623f9107942f3"},
-    {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bbab6faf6568484707acc052f4dfc3802bdb0cafe079383fbaa23f1cdae9ecd4"},
-    {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7993232bd4044392c47779a3c7e8889fea6883be46281d45a81451acfd704d7e"},
-    {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d6483b1229470e1d8835e52e0ff3c6973b9b97b24cd1c116dca90b57a2cc613"},
-    {file = "lxml-4.8.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ad4332a532e2d5acb231a2e5d33f943750091ee435daffca3fec0a53224e7e33"},
-    {file = "lxml-4.8.0-cp36-cp36m-win32.whl", hash = "sha256:db3535733f59e5605a88a706824dfcb9bd06725e709ecb017e165fc1d6e7d429"},
-    {file = "lxml-4.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:5f148b0c6133fb928503cfcdfdba395010f997aa44bcf6474fcdd0c5398d9b63"},
-    {file = "lxml-4.8.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:8a31f24e2a0b6317f33aafbb2f0895c0bce772980ae60c2c640d82caac49628a"},
-    {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:719544565c2937c21a6f76d520e6e52b726d132815adb3447ccffbe9f44203c4"},
-    {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:c0b88ed1ae66777a798dc54f627e32d3b81c8009967c63993c450ee4cbcbec15"},
-    {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fa9b7c450be85bfc6cd39f6df8c5b8cbd76b5d6fc1f69efec80203f9894b885f"},
-    {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9f84ed9f4d50b74fbc77298ee5c870f67cb7e91dcdc1a6915cb1ff6a317476c"},
-    {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1d650812b52d98679ed6c6b3b55cbb8fe5a5460a0aef29aeb08dc0b44577df85"},
-    {file = "lxml-4.8.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:80bbaddf2baab7e6de4bc47405e34948e694a9efe0861c61cdc23aa774fcb141"},
-    {file = "lxml-4.8.0-cp37-cp37m-win32.whl", hash = "sha256:6f7b82934c08e28a2d537d870293236b1000d94d0b4583825ab9649aef7ddf63"},
-    {file = "lxml-4.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e1fd7d2fe11f1cb63d3336d147c852f6d07de0d0020d704c6031b46a30b02ca8"},
-    {file = "lxml-4.8.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:5045ee1ccd45a89c4daec1160217d363fcd23811e26734688007c26f28c9e9e7"},
-    {file = "lxml-4.8.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0c1978ff1fd81ed9dcbba4f91cf09faf1f8082c9d72eb122e92294716c605428"},
-    {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cbf2ff155b19dc4d4100f7442f6a697938bf4493f8d3b0c51d45568d5666b5"},
-    {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ce13d6291a5f47c1c8dbd375baa78551053bc6b5e5c0e9bb8e39c0a8359fd52f"},
-    {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11527dc23d5ef44d76fef11213215c34f36af1608074561fcc561d983aeb870"},
-    {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:60d2f60bd5a2a979df28ab309352cdcf8181bda0cca4529769a945f09aba06f9"},
-    {file = "lxml-4.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:62f93eac69ec0f4be98d1b96f4d6b964855b8255c345c17ff12c20b93f247b68"},
-    {file = "lxml-4.8.0-cp38-cp38-win32.whl", hash = "sha256:20b8a746a026017acf07da39fdb10aa80ad9877046c9182442bf80c84a1c4696"},
-    {file = "lxml-4.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:891dc8f522d7059ff0024cd3ae79fd224752676447f9c678f2a5c14b84d9a939"},
-    {file = "lxml-4.8.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b6fc2e2fb6f532cf48b5fed57567ef286addcef38c28874458a41b7837a57807"},
-    {file = "lxml-4.8.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:74eb65ec61e3c7c019d7169387d1b6ffcfea1b9ec5894d116a9a903636e4a0b1"},
-    {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:627e79894770783c129cc5e89b947e52aa26e8e0557c7e205368a809da4b7939"},
-    {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:545bd39c9481f2e3f2727c78c169425efbfb3fbba6e7db4f46a80ebb249819ca"},
-    {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5a58d0b12f5053e270510bf12f753a76aaf3d74c453c00942ed7d2c804ca845c"},
-    {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ec4b4e75fc68da9dc0ed73dcdb431c25c57775383fec325d23a770a64e7ebc87"},
-    {file = "lxml-4.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5804e04feb4e61babf3911c2a974a5b86f66ee227cc5006230b00ac6d285b3a9"},
-    {file = "lxml-4.8.0-cp39-cp39-win32.whl", hash = "sha256:aa0cf4922da7a3c905d000b35065df6184c0dc1d866dd3b86fd961905bbad2ea"},
-    {file = "lxml-4.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:dd10383f1d6b7edf247d0960a3db274c07e96cf3a3fc7c41c8448f93eac3fb1c"},
-    {file = "lxml-4.8.0-pp37-pypy37_pp73-macosx_10_14_x86_64.whl", hash = "sha256:2403a6d6fb61c285969b71f4a3527873fe93fd0abe0832d858a17fe68c8fa507"},
-    {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:986b7a96228c9b4942ec420eff37556c5777bfba6758edcb95421e4a614b57f9"},
-    {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6fe4ef4402df0250b75ba876c3795510d782def5c1e63890bde02d622570d39e"},
-    {file = "lxml-4.8.0-pp38-pypy38_pp73-macosx_10_14_x86_64.whl", hash = "sha256:f10ce66fcdeb3543df51d423ede7e238be98412232fca5daec3e54bcd16b8da0"},
-    {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:730766072fd5dcb219dd2b95c4c49752a54f00157f322bc6d71f7d2a31fecd79"},
-    {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8b99ec73073b37f9ebe8caf399001848fced9c08064effdbfc4da2b5a8d07b93"},
-    {file = "lxml-4.8.0.tar.gz", hash = "sha256:f63f62fc60e6228a4ca9abae28228f35e1bd3ce675013d1dfb828688d50c6e23"},
+    {file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"},
+    {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"},
+    {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"},
+    {file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"},
+    {file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"},
+    {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"},
+    {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"},
+    {file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"},
+    {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"},
+    {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"},
+    {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"},
+    {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"},
+    {file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"},
+    {file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"},
+    {file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"},
+    {file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"},
+    {file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"},
+    {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"},
+    {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"},
+    {file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"},
+    {file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"},
+    {file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"},
+    {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"},
+    {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"},
+    {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"},
+    {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"},
+    {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"},
+    {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"},
+    {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"},
+    {file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"},
+    {file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"},
+    {file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"},
+    {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"},
+    {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"},
+    {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"},
+    {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"},
+    {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"},
+    {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"},
+    {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"},
+    {file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"},
+    {file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"},
+    {file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"},
+    {file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"},
+    {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"},
+    {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"},
+    {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"},
+    {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"},
+    {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"},
+    {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"},
+    {file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"},
+    {file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"},
+    {file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"},
+    {file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"},
+    {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"},
+    {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"},
+    {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"},
+    {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"},
+    {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"},
+    {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"},
+    {file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"},
+    {file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"},
+    {file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"},
+    {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"},
+    {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"},
+    {file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"},
+    {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"},
+    {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"},
+    {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"},
+    {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"},
+    {file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"},
 ]
 markupsafe = [
     {file = "MarkupSafe-2.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c"},

From 2b5ab8e3674b7d6003a5f17252c7933c2d6a381a Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Thu, 7 Jul 2022 12:02:09 +0100
Subject: [PATCH 083/178] Use a single query in `ProfileHandler.get_profile`
 (#13209)

---
 changelog.d/13209.misc      |  1 +
 synapse/handlers/profile.py | 19 +++++++------------
 2 files changed, 8 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13209.misc

diff --git a/changelog.d/13209.misc b/changelog.d/13209.misc
new file mode 100644
index 0000000000..cb0b8b4e63
--- /dev/null
+++ b/changelog.d/13209.misc
@@ -0,0 +1 @@
+Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 6eed3826a7..d8ff5289b5 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -67,19 +67,14 @@ class ProfileHandler:
         target_user = UserID.from_string(user_id)
 
         if self.hs.is_mine(target_user):
-            try:
-                displayname = await self.store.get_profile_displayname(
-                    target_user.localpart
-                )
-                avatar_url = await self.store.get_profile_avatar_url(
-                    target_user.localpart
-                )
-            except StoreError as e:
-                if e.code == 404:
-                    raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
-                raise
+            profileinfo = await self.store.get_profileinfo(target_user.localpart)
+            if profileinfo.display_name is None:
+                raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
 
-            return {"displayname": displayname, "avatar_url": avatar_url}
+            return {
+                "displayname": profileinfo.display_name,
+                "avatar_url": profileinfo.avatar_url,
+            }
         else:
             try:
                 result = await self.federation.make_query(

From 1391a76cd2b287daebe61f7d8ea03b258ed522f5 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 7 Jul 2022 13:19:31 +0100
Subject: [PATCH 084/178] Faster room joins: fix race in recalculation of
 current room state (#13151)

Bounce recalculation of current state to the correct event persister and
move recalculation of current state into the event persistence queue, to
avoid concurrent updates to a room's current state.

Also give recalculation of a room's current state a real stream
ordering.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13151.misc                        |   1 +
 synapse/handlers/federation.py                |   9 +-
 synapse/replication/http/__init__.py          |   2 +
 synapse/replication/http/state.py             |  75 ++++++++++
 synapse/state/__init__.py                     |  25 ++++
 synapse/storage/controllers/persist_events.py | 141 +++++++++++++-----
 synapse/storage/databases/main/events.py      |  14 +-
 tests/test_state.py                           |   2 +
 8 files changed, 214 insertions(+), 55 deletions(-)
 create mode 100644 changelog.d/13151.misc
 create mode 100644 synapse/replication/http/state.py

diff --git a/changelog.d/13151.misc b/changelog.d/13151.misc
new file mode 100644
index 0000000000..cfe3eed3a1
--- /dev/null
+++ b/changelog.d/13151.misc
@@ -0,0 +1 @@
+Faster room joins: fix race in recalculation of current room state.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 3c44b4bf86..e2564e9340 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1559,14 +1559,9 @@ class FederationHandler:
                 # all the events are updated, so we can update current state and
                 # clear the lazy-loading flag.
                 logger.info("Updating current state for %s", room_id)
-                # TODO(faster_joins): support workers
+                # TODO(faster_joins): notify workers in notify_room_un_partial_stated
                 #   https://github.com/matrix-org/synapse/issues/12994
-                assert (
-                    self._storage_controllers.persistence is not None
-                ), "worker-mode deployments not currently supported here"
-                await self._storage_controllers.persistence.update_current_state(
-                    room_id
-                )
+                await self.state_handler.update_current_state(room_id)
 
                 logger.info("Clearing partial-state flag for %s", room_id)
                 success = await self.store.clear_partial_state_room(room_id)
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index aec040ee19..53aa7fa4c6 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -25,6 +25,7 @@ from synapse.replication.http import (
     push,
     register,
     send_event,
+    state,
     streams,
 )
 
@@ -48,6 +49,7 @@ class ReplicationRestResource(JsonResource):
         streams.register_servlets(hs, self)
         account_data.register_servlets(hs, self)
         push.register_servlets(hs, self)
+        state.register_servlets(hs, self)
 
         # The following can't currently be instantiated on workers.
         if hs.config.worker.worker_app is None:
diff --git a/synapse/replication/http/state.py b/synapse/replication/http/state.py
new file mode 100644
index 0000000000..838b7584e5
--- /dev/null
+++ b/synapse/replication/http/state.py
@@ -0,0 +1,75 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Tuple
+
+from twisted.web.server import Request
+
+from synapse.api.errors import SynapseError
+from synapse.http.server import HttpServer
+from synapse.replication.http._base import ReplicationEndpoint
+from synapse.types import JsonDict
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class ReplicationUpdateCurrentStateRestServlet(ReplicationEndpoint):
+    """Recalculates the current state for a room, and persists it.
+
+    The API looks like:
+
+        POST /_synapse/replication/update_current_state/:room_id
+
+        {}
+
+        200 OK
+
+        {}
+    """
+
+    NAME = "update_current_state"
+    PATH_ARGS = ("room_id",)
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self._state_handler = hs.get_state_handler()
+        self._events_shard_config = hs.config.worker.events_shard_config
+        self._instance_name = hs.get_instance_name()
+
+    @staticmethod
+    async def _serialize_payload(room_id: str) -> JsonDict:  # type: ignore[override]
+        return {}
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        writer_instance = self._events_shard_config.get_instance(room_id)
+        if writer_instance != self._instance_name:
+            raise SynapseError(
+                400, "/update_current_state request was routed to the wrong worker"
+            )
+
+        await self._state_handler.update_current_state(room_id)
+
+        return 200, {}
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.get_instance_name() in hs.config.worker.writers.events:
+        ReplicationUpdateCurrentStateRestServlet(hs).register(http_server)
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index d5cbdb3eef..781d9f06da 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -43,6 +43,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersio
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.logging.context import ContextResourceUsage
+from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.roommember import ProfileInfo
@@ -129,6 +130,12 @@ class StateHandler:
         self.hs = hs
         self._state_resolution_handler = hs.get_state_resolution_handler()
         self._storage_controllers = hs.get_storage_controllers()
+        self._events_shard_config = hs.config.worker.events_shard_config
+        self._instance_name = hs.get_instance_name()
+
+        self._update_current_state_client = (
+            ReplicationUpdateCurrentStateRestServlet.make_client(hs)
+        )
 
     async def get_current_state_ids(
         self,
@@ -423,6 +430,24 @@ class StateHandler:
 
         return {key: state_map[ev_id] for key, ev_id in new_state.items()}
 
+    async def update_current_state(self, room_id: str) -> None:
+        """Recalculates the current state for a room, and persists it.
+
+        Raises:
+            SynapseError(502): if all attempts to connect to the event persister worker
+                fail
+        """
+        writer_instance = self._events_shard_config.get_instance(room_id)
+        if writer_instance != self._instance_name:
+            await self._update_current_state_client(
+                instance_name=writer_instance,
+                room_id=room_id,
+            )
+            return
+
+        assert self._storage_controllers.persistence is not None
+        await self._storage_controllers.persistence.update_current_state(room_id)
+
 
 @attr.s(slots=True, auto_attribs=True)
 class _StateResMetrics:
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index c248fccc81..ea499ce0f8 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -22,6 +22,7 @@ from typing import (
     Any,
     Awaitable,
     Callable,
+    ClassVar,
     Collection,
     Deque,
     Dict,
@@ -33,6 +34,7 @@ from typing import (
     Set,
     Tuple,
     TypeVar,
+    Union,
 )
 
 import attr
@@ -111,9 +113,43 @@ times_pruned_extremities = Counter(
 
 
 @attr.s(auto_attribs=True, slots=True)
-class _EventPersistQueueItem:
+class _PersistEventsTask:
+    """A batch of events to persist."""
+
+    name: ClassVar[str] = "persist_event_batch"  # used for opentracing
+
     events_and_contexts: List[Tuple[EventBase, EventContext]]
     backfilled: bool
+
+    def try_merge(self, task: "_EventPersistQueueTask") -> bool:
+        """Batches events with the same backfilled option together."""
+        if (
+            not isinstance(task, _PersistEventsTask)
+            or self.backfilled != task.backfilled
+        ):
+            return False
+
+        self.events_and_contexts.extend(task.events_and_contexts)
+        return True
+
+
+@attr.s(auto_attribs=True, slots=True)
+class _UpdateCurrentStateTask:
+    """A room whose current state needs recalculating."""
+
+    name: ClassVar[str] = "update_current_state"  # used for opentracing
+
+    def try_merge(self, task: "_EventPersistQueueTask") -> bool:
+        """Deduplicates consecutive recalculations of current state."""
+        return isinstance(task, _UpdateCurrentStateTask)
+
+
+_EventPersistQueueTask = Union[_PersistEventsTask, _UpdateCurrentStateTask]
+
+
+@attr.s(auto_attribs=True, slots=True)
+class _EventPersistQueueItem:
+    task: _EventPersistQueueTask
     deferred: ObservableDeferred
 
     parent_opentracing_span_contexts: List = attr.ib(factory=list)
@@ -127,14 +163,16 @@ _PersistResult = TypeVar("_PersistResult")
 
 
 class _EventPeristenceQueue(Generic[_PersistResult]):
-    """Queues up events so that they can be persisted in bulk with only one
-    concurrent transaction per room.
+    """Queues up tasks so that they can be processed with only one concurrent
+    transaction per room.
+
+    Tasks can be bulk persistence of events or recalculation of a room's current state.
     """
 
     def __init__(
         self,
         per_item_callback: Callable[
-            [List[Tuple[EventBase, EventContext]], bool],
+            [str, _EventPersistQueueTask],
             Awaitable[_PersistResult],
         ],
     ):
@@ -150,18 +188,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
     async def add_to_queue(
         self,
         room_id: str,
-        events_and_contexts: Iterable[Tuple[EventBase, EventContext]],
-        backfilled: bool,
+        task: _EventPersistQueueTask,
     ) -> _PersistResult:
-        """Add events to the queue, with the given persist_event options.
+        """Add a task to the queue.
 
-        If we are not already processing events in this room, starts off a background
+        If we are not already processing tasks in this room, starts off a background
         process to to so, calling the per_item_callback for each item.
 
         Args:
             room_id (str):
-            events_and_contexts (list[(EventBase, EventContext)]):
-            backfilled (bool):
+            task (_EventPersistQueueTask): A _PersistEventsTask or
+                _UpdateCurrentStateTask to process.
 
         Returns:
             the result returned by the `_per_item_callback` passed to
@@ -169,26 +206,20 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
         """
         queue = self._event_persist_queues.setdefault(room_id, deque())
 
-        # if the last item in the queue has the same `backfilled` setting,
-        # we can just add these new events to that item.
-        if queue and queue[-1].backfilled == backfilled:
+        if queue and queue[-1].task.try_merge(task):
+            # the new task has been merged into the last task in the queue
             end_item = queue[-1]
         else:
-            # need to make a new queue item
             deferred: ObservableDeferred[_PersistResult] = ObservableDeferred(
                 defer.Deferred(), consumeErrors=True
             )
 
             end_item = _EventPersistQueueItem(
-                events_and_contexts=[],
-                backfilled=backfilled,
+                task=task,
                 deferred=deferred,
             )
             queue.append(end_item)
 
-        # add our events to the queue item
-        end_item.events_and_contexts.extend(events_and_contexts)
-
         # also add our active opentracing span to the item so that we get a link back
         span = opentracing.active_span()
         if span:
@@ -202,7 +233,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
 
         # add another opentracing span which links to the persist trace.
         with opentracing.start_active_span_follows_from(
-            "persist_event_batch_complete", (end_item.opentracing_span_context,)
+            f"{task.name}_complete", (end_item.opentracing_span_context,)
         ):
             pass
 
@@ -234,16 +265,14 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
                 for item in queue:
                     try:
                         with opentracing.start_active_span_follows_from(
-                            "persist_event_batch",
+                            item.task.name,
                             item.parent_opentracing_span_contexts,
                             inherit_force_tracing=True,
                         ) as scope:
                             if scope:
                                 item.opentracing_span_context = scope.span.context
 
-                            ret = await self._per_item_callback(
-                                item.events_and_contexts, item.backfilled
-                            )
+                            ret = await self._per_item_callback(room_id, item.task)
                     except Exception:
                         with PreserveLoggingContext():
                             item.deferred.errback()
@@ -292,9 +321,32 @@ class EventsPersistenceStorageController:
         self._clock = hs.get_clock()
         self._instance_name = hs.get_instance_name()
         self.is_mine_id = hs.is_mine_id
-        self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch)
+        self._event_persist_queue = _EventPeristenceQueue(
+            self._process_event_persist_queue_task
+        )
         self._state_resolution_handler = hs.get_state_resolution_handler()
 
+    async def _process_event_persist_queue_task(
+        self,
+        room_id: str,
+        task: _EventPersistQueueTask,
+    ) -> Dict[str, str]:
+        """Callback for the _event_persist_queue
+
+        Returns:
+            A dictionary of event ID to event ID we didn't persist as we already
+            had another event persisted with the same TXN ID.
+        """
+        if isinstance(task, _PersistEventsTask):
+            return await self._persist_event_batch(room_id, task)
+        elif isinstance(task, _UpdateCurrentStateTask):
+            await self._update_current_state(room_id, task)
+            return {}
+        else:
+            raise AssertionError(
+                f"Found an unexpected task type in event persistence queue: {task}"
+            )
+
     @opentracing.trace
     async def persist_events(
         self,
@@ -329,7 +381,8 @@ class EventsPersistenceStorageController:
         ) -> Dict[str, str]:
             room_id, evs_ctxs = item
             return await self._event_persist_queue.add_to_queue(
-                room_id, evs_ctxs, backfilled=backfilled
+                room_id,
+                _PersistEventsTask(events_and_contexts=evs_ctxs, backfilled=backfilled),
             )
 
         ret_vals = await yieldable_gather_results(enqueue, partitioned.items())
@@ -376,7 +429,10 @@ class EventsPersistenceStorageController:
         # event was deduplicated. (The dict may also include other entries if
         # the event was persisted in a batch with other events.)
         replaced_events = await self._event_persist_queue.add_to_queue(
-            event.room_id, [(event, context)], backfilled=backfilled
+            event.room_id,
+            _PersistEventsTask(
+                events_and_contexts=[(event, context)], backfilled=backfilled
+            ),
         )
         replaced_event = replaced_events.get(event.event_id)
         if replaced_event:
@@ -391,20 +447,22 @@ class EventsPersistenceStorageController:
 
     async def update_current_state(self, room_id: str) -> None:
         """Recalculate the current state for a room, and persist it"""
+        await self._event_persist_queue.add_to_queue(
+            room_id,
+            _UpdateCurrentStateTask(),
+        )
+
+    async def _update_current_state(
+        self, room_id: str, _task: _UpdateCurrentStateTask
+    ) -> None:
+        """Callback for the _event_persist_queue
+
+        Recalculates the current state for a room, and persists it.
+        """
         state = await self._calculate_current_state(room_id)
         delta = await self._calculate_state_delta(room_id, state)
 
-        # TODO(faster_joins): get a real stream ordering, to make this work correctly
-        #    across workers.
-        #    https://github.com/matrix-org/synapse/issues/12994
-        #
-        # TODO(faster_joins): this can race against event persistence, in which case we
-        #    will end up with incorrect state. Perhaps we should make this a job we
-        #    farm out to the event persister thread, somehow.
-        #    https://github.com/matrix-org/synapse/issues/13007
-        #
-        stream_id = self.main_store.get_room_max_stream_ordering()
-        await self.persist_events_store.update_current_state(room_id, delta, stream_id)
+        await self.persist_events_store.update_current_state(room_id, delta)
 
     async def _calculate_current_state(self, room_id: str) -> StateMap[str]:
         """Calculate the current state of a room, based on the forward extremities
@@ -449,9 +507,7 @@ class EventsPersistenceStorageController:
         return res.state
 
     async def _persist_event_batch(
-        self,
-        events_and_contexts: List[Tuple[EventBase, EventContext]],
-        backfilled: bool = False,
+        self, _room_id: str, task: _PersistEventsTask
     ) -> Dict[str, str]:
         """Callback for the _event_persist_queue
 
@@ -466,6 +522,9 @@ class EventsPersistenceStorageController:
             PartialStateConflictError: if attempting to persist a partial state event in
                 a room that has been un-partial stated.
         """
+        events_and_contexts = task.events_and_contexts
+        backfilled = task.backfilled
+
         replaced_events: Dict[str, str] = {}
         if not events_and_contexts:
             return replaced_events
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 8a0e4e9589..2ff3d21305 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1007,16 +1007,16 @@ class PersistEventsStore:
         self,
         room_id: str,
         state_delta: DeltaState,
-        stream_id: int,
     ) -> None:
         """Update the current state stored in the datatabase for the given room"""
 
-        await self.db_pool.runInteraction(
-            "update_current_state",
-            self._update_current_state_txn,
-            state_delta_by_room={room_id: state_delta},
-            stream_id=stream_id,
-        )
+        async with self._stream_id_gen.get_next() as stream_ordering:
+            await self.db_pool.runInteraction(
+                "update_current_state",
+                self._update_current_state_txn,
+                state_delta_by_room={room_id: state_delta},
+                stream_id=stream_ordering,
+            )
 
     def _update_current_state_txn(
         self,
diff --git a/tests/test_state.py b/tests/test_state.py
index 7b3f52f68e..6ca8d8f21d 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -195,6 +195,8 @@ class StateTestCase(unittest.TestCase):
                 "get_state_resolution_handler",
                 "get_account_validity_handler",
                 "get_macaroon_generator",
+                "get_instance_name",
+                "get_simple_http_client",
                 "hostname",
             ]
         )

From bb20113c8f04d574dd40becf57bf291e350ea8f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Thu, 7 Jul 2022 14:47:26 +0200
Subject: [PATCH 085/178] Remove obsolete RoomEventsStoreTestCase (#13200)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All tests are prefixed with `STALE_` and therefore they are silently
skipped. They were moved to `STALE_` in version `v0.5.0` in commit
2fcce3b3c508 - `Remove stale tests`.

Tests from `RoomEventsStoreTestCase` class are not used for last 8
years, I believe the best would be to remove them entirely.

Signed-off-by: Petr Vaněk <arkamar@atlas.cz>
---
 changelog.d/13200.removal  |  1 +
 tests/storage/test_room.py | 69 --------------------------------------
 2 files changed, 1 insertion(+), 69 deletions(-)
 create mode 100644 changelog.d/13200.removal

diff --git a/changelog.d/13200.removal b/changelog.d/13200.removal
new file mode 100644
index 0000000000..755f5eb192
--- /dev/null
+++ b/changelog.d/13200.removal
@@ -0,0 +1 @@
+Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar.
diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py
index 3c79dabc9f..3405efb6a8 100644
--- a/tests/storage/test_room.py
+++ b/tests/storage/test_room.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.api.constants import EventTypes
 from synapse.api.room_versions import RoomVersions
 from synapse.types import RoomAlias, RoomID, UserID
 
@@ -65,71 +64,3 @@ class RoomStoreTestCase(HomeserverTestCase):
         self.assertIsNone(
             (self.get_success(self.store.get_room_with_stats("!uknown:test"))),
         )
-
-
-class RoomEventsStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
-        # Room events need the full datastore, for persist_event() and
-        # get_room_state()
-        self.store = hs.get_datastores().main
-        self._storage_controllers = hs.get_storage_controllers()
-        self.event_factory = hs.get_event_factory()
-
-        self.room = RoomID.from_string("!abcde:test")
-
-        self.get_success(
-            self.store.store_room(
-                self.room.to_string(),
-                room_creator_user_id="@creator:text",
-                is_public=True,
-                room_version=RoomVersions.V1,
-            )
-        )
-
-    def inject_room_event(self, **kwargs):
-        self.get_success(
-            self._storage_controllers.persistence.persist_event(
-                self.event_factory.create_event(room_id=self.room.to_string(), **kwargs)
-            )
-        )
-
-    def STALE_test_room_name(self):
-        name = "A-Room-Name"
-
-        self.inject_room_event(
-            etype=EventTypes.Name, name=name, content={"name": name}, depth=1
-        )
-
-        state = self.get_success(
-            self._storage_controllers.state.get_current_state(
-                room_id=self.room.to_string()
-            )
-        )
-
-        self.assertEqual(1, len(state))
-        self.assertObjectHasAttributes(
-            {"type": "m.room.name", "room_id": self.room.to_string(), "name": name},
-            state[0],
-        )
-
-    def STALE_test_room_topic(self):
-        topic = "A place for things"
-
-        self.inject_room_event(
-            etype=EventTypes.Topic, topic=topic, content={"topic": topic}, depth=1
-        )
-
-        state = self.get_success(
-            self._storage_controllers.state.get_current_state(
-                room_id=self.room.to_string()
-            )
-        )
-
-        self.assertEqual(1, len(state))
-        self.assertObjectHasAttributes(
-            {"type": "m.room.topic", "room_id": self.room.to_string(), "topic": topic},
-            state[0],
-        )
-
-    # Not testing the various 'level' methods for now because there's lots
-    # of them and need coalescing; see JIRA SPEC-11

From 0c95313a448ab38629a13443ea9b3e0e5cc65d39 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 7 Jul 2022 15:18:38 +0100
Subject: [PATCH 086/178] Add --build-only option to complement.sh to prevent
 actually running Complement. (#13158)

---
 changelog.d/13158.misc    |  1 +
 scripts-dev/complement.sh | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13158.misc

diff --git a/changelog.d/13158.misc b/changelog.d/13158.misc
new file mode 100644
index 0000000000..1cb77c02d7
--- /dev/null
+++ b/changelog.d/13158.misc
@@ -0,0 +1 @@
+Add support to `complement.sh` for skipping the docker build.
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 705243ca9b..6381f7092e 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -44,8 +44,14 @@ usage() {
 Usage: $0 [-f] <go test arguments>...
 Run the complement test suite on Synapse.
 
-  -f    Skip rebuilding the docker images, and just use the most recent
-        'complement-synapse:latest' image
+  -f, --fast
+        Skip rebuilding the docker images, and just use the most recent
+        'complement-synapse:latest' image.
+        Conflicts with --build-only.
+
+  --build-only
+        Only build the Docker images. Don't actually run Complement.
+        Conflicts with -f/--fast.
 
 For help on arguments to 'go test', run 'go help testflag'.
 EOF
@@ -53,6 +59,7 @@ EOF
 
 # parse our arguments
 skip_docker_build=""
+skip_complement_run=""
 while [ $# -ge 1 ]; do
     arg=$1
     case "$arg" in
@@ -60,9 +67,12 @@ while [ $# -ge 1 ]; do
             usage
             exit 1
             ;;
-        "-f")
+        "-f"|"--fast")
             skip_docker_build=1
             ;;
+        "--build-only")
+            skip_complement_run=1
+            ;;
         *)
             # unknown arg: presumably an argument to gotest. break the loop.
             break
@@ -106,6 +116,11 @@ if [ -z "$skip_docker_build" ]; then
     echo_if_github "::endgroup::"
 fi
 
+if [ -n "$skip_complement_run" ]; then
+    echo "Skipping Complement run as requested."
+    exit
+fi
+
 export COMPLEMENT_BASE_IMAGE=complement-synapse
 
 extra_test_args=()

From a962c5a56de69c03848646f25991fabe6e4c39d1 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 7 Jul 2022 11:52:45 -0500
Subject: [PATCH 087/178] Fix exception when using MSC3030 to look for remote
 federated events before room creation (#13197)

Complement tests: https://github.com/matrix-org/complement/pull/405

This happens when you have some messages imported before the room is created.
Then use MSC3030 to look backwards before the room creation from a remote
federated server. The server won't find anything locally, but will ask over
federation which will have the remote event. The previous logic would
choke on not having the local event assigned.

```
Failed to fetch /timestamp_to_event from hs2 because of exception(UnboundLocalError) local variable 'local_event' referenced before assignment args=("local variable 'local_event' referenced before assignment",)
```
---
 changelog.d/13197.bugfix | 1 +
 synapse/handlers/room.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13197.bugfix

diff --git a/changelog.d/13197.bugfix b/changelog.d/13197.bugfix
new file mode 100644
index 0000000000..8417241523
--- /dev/null
+++ b/changelog.d/13197.bugfix
@@ -0,0 +1 @@
+Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 75c0be8c36..44f8084579 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1375,6 +1375,7 @@ class TimestampLookupHandler:
         # the timestamp given and the event we were able to find locally
         is_event_next_to_backward_gap = False
         is_event_next_to_forward_gap = False
+        local_event = None
         if local_event_id:
             local_event = await self.store.get_event(
                 local_event_id, allow_none=False, allow_rejected=False
@@ -1461,7 +1462,10 @@ class TimestampLookupHandler:
                         ex.args,
                     )
 
-        if not local_event_id:
+        # To appease mypy, we have to add both of these conditions to check for
+        # `None`. We only expect `local_event` to be `None` when
+        # `local_event_id` is `None` but mypy isn't as smart and assuming as us.
+        if not local_event_id or not local_event:
             raise SynapseError(
                 404,
                 "Unable to find event from %s in direction %s" % (timestamp, direction),

From 757bc0caefa596e747278b3bcf4269ec50ffc759 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 8 Jul 2022 14:00:29 +0100
Subject: [PATCH 088/178] Fix notification count after a highlighted message
 (#13223)

Fixes #13196

Broke by #13005
---
 changelog.d/13223.bugfix                             |  1 +
 synapse/storage/databases/main/event_push_actions.py | 11 ++++++++---
 tests/storage/test_event_push_actions.py             |  7 +++++++
 3 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13223.bugfix

diff --git a/changelog.d/13223.bugfix b/changelog.d/13223.bugfix
new file mode 100644
index 0000000000..6ee3aed910
--- /dev/null
+++ b/changelog.d/13223.bugfix
@@ -0,0 +1 @@
+Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index a3edcbb398..1a951ac02a 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1016,9 +1016,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 upd.stream_ordering
             FROM (
                 SELECT user_id, room_id, count(*) as cnt,
-                    max(stream_ordering) as stream_ordering
-                FROM event_push_actions
-                WHERE ? < stream_ordering AND stream_ordering <= ?
+                    max(ea.stream_ordering) as stream_ordering
+                FROM event_push_actions AS ea
+                LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+                WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ?
+                    AND (
+                        old.last_receipt_stream_ordering IS NULL
+                        OR old.last_receipt_stream_ordering < ea.stream_ordering
+                    )
                     AND %s = 1
                 GROUP BY user_id, room_id
             ) AS upd
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index e68126777f..e8c53f16d9 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -196,6 +196,13 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _mark_read(10, 10)
         _assert_counts(0, 0)
 
+        _inject_actions(11, HIGHLIGHT)
+        _assert_counts(1, 1)
+        _mark_read(11, 11)
+        _assert_counts(0, 0)
+        _rotate(11)
+        _assert_counts(0, 0)
+
     def test_find_first_stream_ordering_after_ts(self) -> None:
         def add_event(so: int, ts: int) -> None:
             self.get_success(

From 739adf15511b2ce983cb5d4d6a948ff543f3b0a8 Mon Sep 17 00:00:00 2001
From: Sumner Evans <me@sumnerevans.com>
Date: Fri, 8 Jul 2022 10:40:25 -0600
Subject: [PATCH 089/178] editorconfig: add max_line_length for Python files
 (#13228)

See the documentation for the property here:
https://github.com/editorconfig/editorconfig/wiki/EditorConfig-Properties#max_line_length

Signed-off-by: Sumner Evans <me@sumnerevans.com>
---
 .editorconfig          | 1 +
 changelog.d/13228.misc | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/13228.misc

diff --git a/.editorconfig b/.editorconfig
index 3edf9e717c..d629bede5e 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -7,3 +7,4 @@ root = true
 [*.py]
 indent_style = space
 indent_size = 4
+max_line_length = 88
diff --git a/changelog.d/13228.misc b/changelog.d/13228.misc
new file mode 100644
index 0000000000..fec086557e
--- /dev/null
+++ b/changelog.d/13228.misc
@@ -0,0 +1 @@
+Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper.

From 28d96cb2b49c12b741d03e4b74f30f8910f9942b Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 11 Jul 2022 10:36:18 +0100
Subject: [PATCH 090/178] Ensure portdb selects _all_ rows with negative rowids
 (#13226)

---
 changelog.d/13226.bugfix            | 1 +
 synapse/_scripts/synapse_port_db.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13226.bugfix

diff --git a/changelog.d/13226.bugfix b/changelog.d/13226.bugfix
new file mode 100644
index 0000000000..df96d41f37
--- /dev/null
+++ b/changelog.d/13226.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 642fd41629..26834a437e 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -418,12 +418,15 @@ class Porter:
             self.progress.update(table, table_size)  # Mark table as done
             return
 
+        # We sweep over rowids in two directions: one forwards (rowids 1, 2, 3, ...)
+        # and another backwards (rowids 0, -1, -2, ...).
         forward_select = (
             "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,)
         )
 
         backward_select = (
-            "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid LIMIT ?" % (table,)
+            "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid DESC LIMIT ?"
+            % (table,)
         )
 
         do_forward = [True]

From a11301179494f5a2924dcd60069c06f5c192020f Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 11 Jul 2022 07:12:28 -0600
Subject: [PATCH 091/178] Fix appservice EDUs failing to send if the EDU
 doesn't have a room ID (#13236)

* Fix appservice EDUs failing to send if the EDU doesn't have a room ID

As is in the case of presence.

* changelog

* linter

* fix linter again
---
 changelog.d/13236.bugfix        | 1 +
 synapse/appservice/scheduler.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13236.bugfix

diff --git a/changelog.d/13236.bugfix b/changelog.d/13236.bugfix
new file mode 100644
index 0000000000..7fddc4413d
--- /dev/null
+++ b/changelog.d/13236.bugfix
@@ -0,0 +1 @@
+Fix appservices not receiving room-less EDUs, like presence, if enabled.
\ No newline at end of file
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index de5e5216c2..6c8695346f 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -319,7 +319,9 @@ class _ServiceQueuer:
         rooms_of_interesting_users.update(event.room_id for event in events)
         # EDUs
         rooms_of_interesting_users.update(
-            ephemeral["room_id"] for ephemeral in ephemerals
+            ephemeral["room_id"]
+            for ephemeral in ephemerals
+            if ephemeral.get("room_id") is not None
         )
 
         # Look up the AS users in those rooms

From e610128c507149e46d459bf97ba0fb6a8bd34b34 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 11 Jul 2022 14:14:09 +0100
Subject: [PATCH 092/178] Add a `filter_event_for_clients_with_state` function
 (#13222)

---
 changelog.d/13222.misc   |   1 +
 synapse/app/admin_cmd.py |  13 +-
 synapse/visibility.py    | 546 ++++++++++++++++++++++++++++-----------
 3 files changed, 411 insertions(+), 149 deletions(-)
 create mode 100644 changelog.d/13222.misc

diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc
new file mode 100644
index 0000000000..0bab1aed70
--- /dev/null
+++ b/changelog.d/13222.misc
@@ -0,0 +1 @@
+Improve memory usage of calculating push actions for events in large rooms.
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 561621a285..87f82bd9a5 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -39,6 +39,7 @@ from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
 from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.server import HomeServer
+from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main.room import RoomWorkerStore
 from synapse.types import StateMap
 from synapse.util import SYNAPSE_VERSION
@@ -60,7 +61,17 @@ class AdminCmdSlavedStore(
     BaseSlavedStore,
     RoomWorkerStore,
 ):
-    pass
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        # Annoyingly `filter_events_for_client` assumes that this exists. We
+        # should refactor it to take a `Clock` directly.
+        self.clock = hs.get_clock()
 
 
 class AdminCmdServer(HomeServer):
diff --git a/synapse/visibility.py b/synapse/visibility.py
index 8aaa8c709f..9abbaa5a64 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -13,16 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from enum import Enum, auto
 from typing import Collection, Dict, FrozenSet, List, Optional, Tuple
 
+import attr
 from typing_extensions import Final
 
 from synapse.api.constants import EventTypes, HistoryVisibility, Membership
 from synapse.events import EventBase
+from synapse.events.snapshot import EventContext
 from synapse.events.utils import prune_event
 from synapse.storage.controllers import StorageControllers
+from synapse.storage.databases.main import DataStore
 from synapse.storage.state import StateFilter
 from synapse.types import RetentionPolicy, StateMap, get_domain_from_id
+from synapse.util import Clock
 
 logger = logging.getLogger(__name__)
 
@@ -102,153 +107,18 @@ async def filter_events_for_client(
             ] = await storage.main.get_retention_policy_for_room(room_id)
 
     def allowed(event: EventBase) -> Optional[EventBase]:
-        """
-        Args:
-            event: event to check
-
-        Returns:
-           None if the user cannot see this event at all
-
-           a redacted copy of the event if they can only see a redacted
-           version
-
-           the original event if they can see it as normal.
-        """
-        # Only run some checks if these events aren't about to be sent to clients. This is
-        # because, if this is not the case, we're probably only checking if the users can
-        # see events in the room at that point in the DAG, and that shouldn't be decided
-        # on those checks.
-        if filter_send_to_client:
-            if event.type == EventTypes.Dummy:
-                return None
-
-            if not event.is_state() and event.sender in ignore_list:
-                return None
-
-            # Until MSC2261 has landed we can't redact malicious alias events, so for
-            # now we temporarily filter out m.room.aliases entirely to mitigate
-            # abuse, while we spec a better solution to advertising aliases
-            # on rooms.
-            if event.type == EventTypes.Aliases:
-                return None
-
-            # Don't try to apply the room's retention policy if the event is a state
-            # event, as MSC1763 states that retention is only considered for non-state
-            # events.
-            if not event.is_state():
-                retention_policy = retention_policies[event.room_id]
-                max_lifetime = retention_policy.max_lifetime
-
-                if max_lifetime is not None:
-                    oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime
-
-                    if event.origin_server_ts < oldest_allowed_ts:
-                        return None
-
-        if event.event_id in always_include_ids:
-            return event
-
-        # we need to handle outliers separately, since we don't have the room state.
-        if event.internal_metadata.outlier:
-            # Normally these can't be seen by clients, but we make an exception for
-            # for out-of-band membership events (eg, incoming invites, or rejections of
-            # said invite) for the user themselves.
-            if event.type == EventTypes.Member and event.state_key == user_id:
-                logger.debug("Returning out-of-band-membership event %s", event)
-                return event
-
-            return None
-
-        state = event_id_to_state[event.event_id]
-
-        # get the room_visibility at the time of the event.
-        visibility = get_effective_room_visibility_from_state(state)
-
-        # Always allow history visibility events on boundaries. This is done
-        # by setting the effective visibility to the least restrictive
-        # of the old vs new.
-        if event.type == EventTypes.RoomHistoryVisibility:
-            prev_content = event.unsigned.get("prev_content", {})
-            prev_visibility = prev_content.get("history_visibility", None)
-
-            if prev_visibility not in VISIBILITY_PRIORITY:
-                prev_visibility = HistoryVisibility.SHARED
-
-            new_priority = VISIBILITY_PRIORITY.index(visibility)
-            old_priority = VISIBILITY_PRIORITY.index(prev_visibility)
-            if old_priority < new_priority:
-                visibility = prev_visibility
-
-        # likewise, if the event is the user's own membership event, use
-        # the 'most joined' membership
-        membership = None
-        if event.type == EventTypes.Member and event.state_key == user_id:
-            membership = event.content.get("membership", None)
-            if membership not in MEMBERSHIP_PRIORITY:
-                membership = "leave"
-
-            prev_content = event.unsigned.get("prev_content", {})
-            prev_membership = prev_content.get("membership", None)
-            if prev_membership not in MEMBERSHIP_PRIORITY:
-                prev_membership = "leave"
-
-            # Always allow the user to see their own leave events, otherwise
-            # they won't see the room disappear if they reject the invite
-            #
-            # (Note this doesn't work for out-of-band invite rejections, which don't
-            # have prev_state populated. They are handled above in the outlier code.)
-            if membership == "leave" and (
-                prev_membership == "join" or prev_membership == "invite"
-            ):
-                return event
-
-            new_priority = MEMBERSHIP_PRIORITY.index(membership)
-            old_priority = MEMBERSHIP_PRIORITY.index(prev_membership)
-            if old_priority < new_priority:
-                membership = prev_membership
-
-        # otherwise, get the user's membership at the time of the event.
-        if membership is None:
-            membership_event = state.get((EventTypes.Member, user_id), None)
-            if membership_event:
-                membership = membership_event.membership
-
-        # if the user was a member of the room at the time of the event,
-        # they can see it.
-        if membership == Membership.JOIN:
-            return event
-
-        # otherwise, it depends on the room visibility.
-
-        if visibility == HistoryVisibility.JOINED:
-            # we weren't a member at the time of the event, so we can't
-            # see this event.
-            return None
-
-        elif visibility == HistoryVisibility.INVITED:
-            # user can also see the event if they were *invited* at the time
-            # of the event.
-            return event if membership == Membership.INVITE else None
-
-        elif visibility == HistoryVisibility.SHARED and is_peeking:
-            # if the visibility is shared, users cannot see the event unless
-            # they have *subsequently* joined the room (or were members at the
-            # time, of course)
-            #
-            # XXX: if the user has subsequently joined and then left again,
-            # ideally we would share history up to the point they left. But
-            # we don't know when they left. We just treat it as though they
-            # never joined, and restrict access.
-            return None
-
-        # the visibility is either shared or world_readable, and the user was
-        # not a member at the time. We allow it, provided the original sender
-        # has not requested their data to be erased, in which case, we return
-        # a redacted version.
-        if erased_senders[event.sender]:
-            return prune_event(event)
-
-        return event
+        return _check_client_allowed_to_see_event(
+            user_id=user_id,
+            event=event,
+            clock=storage.main.clock,
+            filter_send_to_client=filter_send_to_client,
+            sender_ignored=event.sender in ignore_list,
+            always_include_ids=always_include_ids,
+            retention_policy=retention_policies[room_id],
+            state=event_id_to_state.get(event.event_id),
+            is_peeking=is_peeking,
+            sender_erased=erased_senders.get(event.sender, False),
+        )
 
     # Check each event: gives an iterable of None or (a potentially modified)
     # EventBase.
@@ -258,9 +128,389 @@ async def filter_events_for_client(
     return [ev for ev in filtered_events if ev]
 
 
+async def filter_event_for_clients_with_state(
+    store: DataStore,
+    user_ids: Collection[str],
+    event: EventBase,
+    context: EventContext,
+    is_peeking: bool = False,
+    filter_send_to_client: bool = True,
+) -> Collection[str]:
+    """
+    Checks to see if an event is visible to the users in the list at the time of
+    the event.
+
+    Note: This does *not* check if the sender of the event was erased.
+
+    Args:
+        store: databases
+        user_ids: user_ids to be checked
+        event: the event to be checked
+        context: EventContext for the event to be checked
+        is_peeking: Whether the users are peeking into the room, ie not
+            currently joined
+        filter_send_to_client: Whether we're checking an event that's going to be
+            sent to a client. This might not always be the case since this function can
+            also be called to check whether a user can see the state at a given point.
+
+    Returns:
+        Collection of user IDs for whom the event is visible
+    """
+    # None of the users should see the event if it is soft_failed
+    if event.internal_metadata.is_soft_failed():
+        return []
+
+    # Make a set for all user IDs that haven't been filtered out by a check.
+    allowed_user_ids = set(user_ids)
+
+    # Only run some checks if these events aren't about to be sent to clients. This is
+    # because, if this is not the case, we're probably only checking if the users can
+    # see events in the room at that point in the DAG, and that shouldn't be decided
+    # on those checks.
+    if filter_send_to_client:
+        ignored_by = await store.ignored_by(event.sender)
+        retention_policy = await store.get_retention_policy_for_room(event.room_id)
+
+        for user_id in user_ids:
+            if (
+                _check_filter_send_to_client(
+                    event,
+                    store.clock,
+                    retention_policy,
+                    sender_ignored=user_id in ignored_by,
+                )
+                == _CheckFilter.DENIED
+            ):
+                allowed_user_ids.discard(user_id)
+
+    if event.internal_metadata.outlier:
+        # Normally these can't be seen by clients, but we make an exception for
+        # for out-of-band membership events (eg, incoming invites, or rejections of
+        # said invite) for the user themselves.
+        if event.type == EventTypes.Member and event.state_key in allowed_user_ids:
+            logger.debug("Returning out-of-band-membership event %s", event)
+            return {event.state_key}
+
+        return set()
+
+    # First we get just the history visibility in case its shared/world-readable
+    # room.
+    visibility_state_map = await _get_state_map(
+        store, event, context, StateFilter.from_types([_HISTORY_VIS_KEY])
+    )
+
+    visibility = get_effective_room_visibility_from_state(visibility_state_map)
+    if (
+        _check_history_visibility(event, visibility, is_peeking=is_peeking)
+        == _CheckVisibility.ALLOWED
+    ):
+        return allowed_user_ids
+
+    # The history visibility isn't lax, so we now need to fetch the membership
+    # events of all the users.
+
+    filter_list = []
+    for user_id in allowed_user_ids:
+        filter_list.append((EventTypes.Member, user_id))
+    filter_list.append((EventTypes.RoomHistoryVisibility, ""))
+
+    state_filter = StateFilter.from_types(filter_list)
+    state_map = await _get_state_map(store, event, context, state_filter)
+
+    # Now we check whether the membership allows each user to see the event.
+    return {
+        user_id
+        for user_id in allowed_user_ids
+        if _check_membership(user_id, event, visibility, state_map, is_peeking).allowed
+    }
+
+
+async def _get_state_map(
+    store: DataStore, event: EventBase, context: EventContext, state_filter: StateFilter
+) -> StateMap[EventBase]:
+    """Helper function for getting a `StateMap[EventBase]` from an `EventContext`"""
+    state_map = await context.get_prev_state_ids(state_filter)
+
+    # Use events rather than event ids as content from the events are needed in
+    # _check_visibility
+    event_map = await store.get_events(state_map.values(), get_prev_content=False)
+
+    updated_state_map = {}
+    for state_key, event_id in state_map.items():
+        state_event = event_map.get(event_id)
+        if state_event:
+            updated_state_map[state_key] = state_event
+
+    if event.is_state():
+        current_state_key = (event.type, event.state_key)
+        # Add current event to updated_state_map, we need to do this here as it
+        # may not have been persisted to the db yet
+        updated_state_map[current_state_key] = event
+
+    return updated_state_map
+
+
+def _check_client_allowed_to_see_event(
+    user_id: str,
+    event: EventBase,
+    clock: Clock,
+    filter_send_to_client: bool,
+    is_peeking: bool,
+    always_include_ids: FrozenSet[str],
+    sender_ignored: bool,
+    retention_policy: RetentionPolicy,
+    state: Optional[StateMap[EventBase]],
+    sender_erased: bool,
+) -> Optional[EventBase]:
+    """Check with the given user is allowed to see the given event
+
+    See `filter_events_for_client` for details about args
+
+    Args:
+        user_id
+        event
+        clock
+        filter_send_to_client
+        is_peeking
+        always_include_ids
+        sender_ignored: Whether the user is ignoring the event sender
+        retention_policy: The retention policy of the room
+        state: The state at the event, unless its an outlier
+        sender_erased: Whether the event sender has been marked as "erased"
+
+    Returns:
+        None if the user cannot see this event at all
+
+        a redacted copy of the event if they can only see a redacted
+        version
+
+        the original event if they can see it as normal.
+    """
+    # Only run some checks if these events aren't about to be sent to clients. This is
+    # because, if this is not the case, we're probably only checking if the users can
+    # see events in the room at that point in the DAG, and that shouldn't be decided
+    # on those checks.
+    if filter_send_to_client:
+        if (
+            _check_filter_send_to_client(event, clock, retention_policy, sender_ignored)
+            == _CheckFilter.DENIED
+        ):
+            return None
+
+    if event.event_id in always_include_ids:
+        return event
+
+    # we need to handle outliers separately, since we don't have the room state.
+    if event.internal_metadata.outlier:
+        # Normally these can't be seen by clients, but we make an exception for
+        # for out-of-band membership events (eg, incoming invites, or rejections of
+        # said invite) for the user themselves.
+        if event.type == EventTypes.Member and event.state_key == user_id:
+            logger.debug("Returning out-of-band-membership event %s", event)
+            return event
+
+        return None
+
+    if state is None:
+        raise Exception("Missing state for non-outlier event")
+
+    # get the room_visibility at the time of the event.
+    visibility = get_effective_room_visibility_from_state(state)
+
+    # Check if the room has lax history visibility, allowing us to skip
+    # membership checks.
+    #
+    # We can only do this check if the sender has *not* been erased, as if they
+    # have we need to check the user's membership.
+    if (
+        not sender_erased
+        and _check_history_visibility(event, visibility, is_peeking)
+        == _CheckVisibility.ALLOWED
+    ):
+        return event
+
+    membership_result = _check_membership(user_id, event, visibility, state, is_peeking)
+    if not membership_result.allowed:
+        return None
+
+    # If the sender has been erased and the user was not joined at the time, we
+    # must only return the redacted form.
+    if sender_erased and not membership_result.joined:
+        event = prune_event(event)
+
+    return event
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class _CheckMembershipReturn:
+    "Return value of _check_membership"
+    allowed: bool
+    joined: bool
+
+
+def _check_membership(
+    user_id: str,
+    event: EventBase,
+    visibility: str,
+    state: StateMap[EventBase],
+    is_peeking: bool,
+) -> _CheckMembershipReturn:
+    """Check whether the user can see the event due to their membership
+
+    Returns:
+        True if they can, False if they can't, plus the membership of the user
+        at the event.
+    """
+    # If the event is the user's own membership event, use the 'most joined'
+    # membership
+    membership = None
+    if event.type == EventTypes.Member and event.state_key == user_id:
+        membership = event.content.get("membership", None)
+        if membership not in MEMBERSHIP_PRIORITY:
+            membership = "leave"
+
+        prev_content = event.unsigned.get("prev_content", {})
+        prev_membership = prev_content.get("membership", None)
+        if prev_membership not in MEMBERSHIP_PRIORITY:
+            prev_membership = "leave"
+
+        # Always allow the user to see their own leave events, otherwise
+        # they won't see the room disappear if they reject the invite
+        #
+        # (Note this doesn't work for out-of-band invite rejections, which don't
+        # have prev_state populated. They are handled above in the outlier code.)
+        if membership == "leave" and (
+            prev_membership == "join" or prev_membership == "invite"
+        ):
+            return _CheckMembershipReturn(True, membership == Membership.JOIN)
+
+        new_priority = MEMBERSHIP_PRIORITY.index(membership)
+        old_priority = MEMBERSHIP_PRIORITY.index(prev_membership)
+        if old_priority < new_priority:
+            membership = prev_membership
+
+    # otherwise, get the user's membership at the time of the event.
+    if membership is None:
+        membership_event = state.get((EventTypes.Member, user_id), None)
+        if membership_event:
+            membership = membership_event.membership
+
+    # if the user was a member of the room at the time of the event,
+    # they can see it.
+    if membership == Membership.JOIN:
+        return _CheckMembershipReturn(True, True)
+
+    # otherwise, it depends on the room visibility.
+
+    if visibility == HistoryVisibility.JOINED:
+        # we weren't a member at the time of the event, so we can't
+        # see this event.
+        return _CheckMembershipReturn(False, False)
+
+    elif visibility == HistoryVisibility.INVITED:
+        # user can also see the event if they were *invited* at the time
+        # of the event.
+        return _CheckMembershipReturn(membership == Membership.INVITE, False)
+
+    elif visibility == HistoryVisibility.SHARED and is_peeking:
+        # if the visibility is shared, users cannot see the event unless
+        # they have *subsequently* joined the room (or were members at the
+        # time, of course)
+        #
+        # XXX: if the user has subsequently joined and then left again,
+        # ideally we would share history up to the point they left. But
+        # we don't know when they left. We just treat it as though they
+        # never joined, and restrict access.
+        return _CheckMembershipReturn(False, False)
+
+    # The visibility is either shared or world_readable, and the user was
+    # not a member at the time. We allow it.
+    return _CheckMembershipReturn(True, False)
+
+
+class _CheckFilter(Enum):
+    MAYBE_ALLOWED = auto()
+    DENIED = auto()
+
+
+def _check_filter_send_to_client(
+    event: EventBase,
+    clock: Clock,
+    retention_policy: RetentionPolicy,
+    sender_ignored: bool,
+) -> _CheckFilter:
+    """Apply checks for sending events to client
+
+    Returns:
+        True if might be allowed to be sent to clients, False if definitely not.
+    """
+
+    if event.type == EventTypes.Dummy:
+        return _CheckFilter.DENIED
+
+    if not event.is_state() and sender_ignored:
+        return _CheckFilter.DENIED
+
+    # Until MSC2261 has landed we can't redact malicious alias events, so for
+    # now we temporarily filter out m.room.aliases entirely to mitigate
+    # abuse, while we spec a better solution to advertising aliases
+    # on rooms.
+    if event.type == EventTypes.Aliases:
+        return _CheckFilter.DENIED
+
+    # Don't try to apply the room's retention policy if the event is a state
+    # event, as MSC1763 states that retention is only considered for non-state
+    # events.
+    if not event.is_state():
+        max_lifetime = retention_policy.max_lifetime
+
+        if max_lifetime is not None:
+            oldest_allowed_ts = clock.time_msec() - max_lifetime
+
+            if event.origin_server_ts < oldest_allowed_ts:
+                return _CheckFilter.DENIED
+
+    return _CheckFilter.MAYBE_ALLOWED
+
+
+class _CheckVisibility(Enum):
+    ALLOWED = auto()
+    MAYBE_DENIED = auto()
+
+
+def _check_history_visibility(
+    event: EventBase, visibility: str, is_peeking: bool
+) -> _CheckVisibility:
+    """Check if event is allowed to be seen due to lax history visibility.
+
+    Returns:
+        True if user can definitely see the event, False if maybe not.
+    """
+    # Always allow history visibility events on boundaries. This is done
+    # by setting the effective visibility to the least restrictive
+    # of the old vs new.
+    if event.type == EventTypes.RoomHistoryVisibility:
+        prev_content = event.unsigned.get("prev_content", {})
+        prev_visibility = prev_content.get("history_visibility", None)
+
+        if prev_visibility not in VISIBILITY_PRIORITY:
+            prev_visibility = HistoryVisibility.SHARED
+
+        new_priority = VISIBILITY_PRIORITY.index(visibility)
+        old_priority = VISIBILITY_PRIORITY.index(prev_visibility)
+        if old_priority < new_priority:
+            visibility = prev_visibility
+
+    if visibility == HistoryVisibility.SHARED and not is_peeking:
+        return _CheckVisibility.ALLOWED
+    elif visibility == HistoryVisibility.WORLD_READABLE:
+        return _CheckVisibility.ALLOWED
+
+    return _CheckVisibility.MAYBE_DENIED
+
+
 def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str:
     """Get the actual history vis, from a state map including the history_visibility event
-
     Handles missing and invalid history visibility events.
     """
     visibility_event = state.get(_HISTORY_VIS_KEY, None)

From 5ef2f875699da76e7070593418b066f5c293a12a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 11 Jul 2022 15:05:24 +0100
Subject: [PATCH 093/178] Document the 'databases' homeserver config option
 (#13212)

---
 changelog.d/13212.doc                         |  1 +
 .../configuration/config_documentation.md     | 92 +++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 changelog.d/13212.doc

diff --git a/changelog.d/13212.doc b/changelog.d/13212.doc
new file mode 100644
index 0000000000..e6b65d826f
--- /dev/null
+++ b/changelog.d/13212.doc
@@ -0,0 +1 @@
+Add documentation for the existing `databases` option in the homeserver configuration manual.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index ef411c5356..5deabb53d7 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1257,6 +1257,98 @@ database:
     cp_max: 10
 ```
 ---
+### `databases`
+
+The `databases` option allows specifying a mapping between certain database tables and
+database host details, spreading the load of a single Synapse instance across multiple
+database backends. This is often referred to as "database sharding". This option is only
+supported for PostgreSQL database backends.
+
+**Important note:** This is a supported option, but is not currently used in production by the
+Matrix.org Foundation. Proceed with caution and always make backups.
+
+`databases` is a dictionary of arbitrarily-named database entries. Each entry is equivalent
+to the value of the `database` homeserver config option (see above), with the addition of
+a `data_stores` key. `data_stores` is an array of strings that specifies the data store(s)
+(a defined label for a set of tables) that should be stored on the associated database
+backend entry.
+
+The currently defined values for `data_stores` are:
+
+* `"state"`: Database that relates to state groups will be stored in this database.
+
+  Specifically, that means the following tables:
+  * `state_groups`
+  * `state_group_edges`
+  * `state_groups_state`
+
+  And the following sequences:
+  * `state_groups_seq_id`
+
+* `"main"`: All other database tables and sequences.
+
+All databases will end up with additional tables used for tracking database schema migrations
+and any pending background updates. Synapse will create these automatically on startup when checking for
+and/or performing database schema migrations.
+
+To migrate an existing database configuration (e.g. all tables on a single database) to a different
+configuration (e.g. the "main" data store on one database, and "state" on another), do the following:
+
+1. Take a backup of your existing database. Things can and do go wrong and database corruption is no joke!
+2. Ensure all pending database migrations have been applied and background updates have run. The simplest
+   way to do this is to use the `update_synapse_database` script supplied with your Synapse installation.
+
+   ```sh
+   update_synapse_database --database-config homeserver.yaml --run-background-updates
+   ```
+
+3. Copy over the necessary tables and sequences from one database to the other. Tables relating to database
+   migrations, schemas, schema versions and background updates should **not** be copied.
+
+   As an example, say that you'd like to split out the "state" data store from an existing database which
+   currently contains all data stores.
+
+   Simply copy the tables and sequences defined above for the "state" datastore from the existing database
+   to the secondary database. As noted above, additional tables will be created in the secondary database
+   when Synapse is started.
+
+4. Modify/create the `databases` option in your `homeserver.yaml` to match the desired database configuration.
+5. Start Synapse. Check that it starts up successfully and that things generally seem to be working.
+6. Drop the old tables that were copied in step 3.
+
+Only one of the options `database` or `databases` may be specified in your config, but not both.
+
+Example configuration:
+
+```yaml
+databases:
+  basement_box:
+    name: psycopg2
+    txn_limit: 10000
+    data_stores: ["main"]
+    args:
+      user: synapse_user
+      password: secretpassword
+      database: synapse_main
+      host: localhost
+      port: 5432
+      cp_min: 5
+      cp_max: 10
+
+  my_other_database:
+    name: psycopg2
+    txn_limit: 10000
+    data_stores: ["state"]
+    args:
+      user: synapse_user
+      password: secretpassword
+      database: synapse_state
+      host: localhost
+      port: 5432
+      cp_min: 5
+      cp_max: 10
+```
+---
 ## Logging ##
 Config options related to logging. 
 

From f1711e1f5c40232b5749d9df23b9857b8c1eb661 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 11 Jul 2022 16:51:30 +0100
Subject: [PATCH 094/178] Remove delay when rotating event push actions
 (#13211)

We want to be as up to date as possible, and sleeping doesn't help here
and can mean we fall behind.
---
 changelog.d/13211.misc                               | 1 +
 synapse/storage/databases/main/event_push_actions.py | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13211.misc

diff --git a/changelog.d/13211.misc b/changelog.d/13211.misc
new file mode 100644
index 0000000000..4d2a6dec65
--- /dev/null
+++ b/changelog.d/13211.misc
@@ -0,0 +1 @@
+More aggressively rotate push actions.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 1a951ac02a..dd2627037c 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -143,7 +143,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             self._find_stream_orderings_for_times, 10 * 60 * 1000
         )
 
-        self._rotate_delay = 3
         self._rotate_count = 10000
         self._doing_notif_rotation = False
         if hs.config.worker.run_background_tasks:
@@ -847,7 +846,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 )
                 if caught_up:
                     break
-                await self.hs.get_clock().sleep(self._rotate_delay)
 
             # Finally we clear out old event push actions.
             await self._remove_old_push_actions_that_have_rotated()
@@ -1114,7 +1112,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         ) -> bool:
             # We don't want to clear out too much at a time, so we bound our
             # deletes.
-            batch_size = 10000
+            batch_size = self._rotate_count
 
             txn.execute(
                 """

From d736d5cfadcc9a56523fcb1cfe8cb1d2be47a4ec Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 11 Jul 2022 10:22:17 -0600
Subject: [PATCH 095/178] Fix to-device messages not being sent to
 MSC3202-enabled appservices (#13235)

The field name was simply incorrect, leading to errors.
---
 changelog.d/13235.bugfix        | 1 +
 synapse/appservice/scheduler.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13235.bugfix

diff --git a/changelog.d/13235.bugfix b/changelog.d/13235.bugfix
new file mode 100644
index 0000000000..5c31fbc775
--- /dev/null
+++ b/changelog.d/13235.bugfix
@@ -0,0 +1 @@
+Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted.
\ No newline at end of file
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 6c8695346f..430ffbcd1f 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -331,8 +331,9 @@ class _ServiceQueuer:
             )
 
         # Add recipients of to-device messages.
-        # device_message["user_id"] is the ID of the recipient.
-        users.update(device_message["user_id"] for device_message in to_device_messages)
+        users.update(
+            device_message["to_user_id"] for device_message in to_device_messages
+        )
 
         # Compute and return the counts / fallback key usage states
         otk_counts = await self._store.count_bulk_e2e_one_time_keys_for_as(users)

From 11f811470ff94dedc4232072b7f9ff099d4fcbd6 Mon Sep 17 00:00:00 2001
From: David Teller <D.O.Teller@gmail.com>
Date: Mon, 11 Jul 2022 18:52:10 +0200
Subject: [PATCH 096/178] Uniformize spam-checker API, part 5: expand other
 spam-checker callbacks to return `Tuple[Codes, dict]` (#13044)

Signed-off-by: David Teller <davidt@element.io>
Co-authored-by: Brendan Abolivier <babolivier@matrix.org>
---
 changelog.d/13044.misc                    |   1 +
 synapse/api/errors.py                     |  10 +-
 synapse/events/spamcheck.py               | 163 ++++++++++++++++-----
 synapse/handlers/directory.py             |   6 +-
 synapse/handlers/federation.py            |   3 +-
 synapse/handlers/room.py                  |  12 +-
 synapse/handlers/room_member.py           |  27 +++-
 synapse/module_api/__init__.py            |   1 +
 synapse/rest/media/v1/media_storage.py    |   4 +-
 tests/rest/client/test_rooms.py           | 168 +++++++++++++++++++++-
 tests/rest/client/utils.py                |  21 +++
 tests/rest/media/v1/test_media_storage.py |  70 ++++++++-
 12 files changed, 426 insertions(+), 60 deletions(-)
 create mode 100644 changelog.d/13044.misc

diff --git a/changelog.d/13044.misc b/changelog.d/13044.misc
new file mode 100644
index 0000000000..f9a0669dd3
--- /dev/null
+++ b/changelog.d/13044.misc
@@ -0,0 +1 @@
+Support temporary experimental return values for spam checker module callbacks.
\ No newline at end of file
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index cc7b785472..1c74e131f2 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -297,8 +297,14 @@ class AuthError(SynapseError):
     other poorly-defined times.
     """
 
-    def __init__(self, code: int, msg: str, errcode: str = Codes.FORBIDDEN):
-        super().__init__(code, msg, errcode)
+    def __init__(
+        self,
+        code: int,
+        msg: str,
+        errcode: str = Codes.FORBIDDEN,
+        additional_fields: Optional[dict] = None,
+    ):
+        super().__init__(code, msg, errcode, additional_fields)
 
 
 class InvalidClientCredentialsError(SynapseError):
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
index 32712d2042..4a3bfb38f1 100644
--- a/synapse/events/spamcheck.py
+++ b/synapse/events/spamcheck.py
@@ -21,7 +21,6 @@ from typing import (
     Awaitable,
     Callable,
     Collection,
-    Dict,
     List,
     Optional,
     Tuple,
@@ -32,10 +31,11 @@ from typing import (
 from typing_extensions import Literal
 
 import synapse
+from synapse.api.errors import Codes
 from synapse.rest.media.v1._base import FileInfo
 from synapse.rest.media.v1.media_storage import ReadableFileWrapper
 from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import RoomAlias, UserProfile
+from synapse.types import JsonDict, RoomAlias, UserProfile
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
 from synapse.util.metrics import Measure
 
@@ -50,12 +50,12 @@ CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[
     Awaitable[
         Union[
             str,
-            "synapse.api.errors.Codes",
+            Codes,
             # Highly experimental, not officially part of the spamchecker API, may
             # disappear without warning depending on the results of ongoing
             # experiments.
             # Use this to return additional information as part of an error.
-            Tuple["synapse.api.errors.Codes", Dict],
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -70,7 +70,12 @@ USER_MAY_JOIN_ROOM_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -81,7 +86,12 @@ USER_MAY_INVITE_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -92,7 +102,12 @@ USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -103,7 +118,12 @@ USER_MAY_CREATE_ROOM_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -114,7 +134,12 @@ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -125,7 +150,12 @@ USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -154,7 +184,12 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[
     Awaitable[
         Union[
             Literal["NOT_SPAM"],
-            "synapse.api.errors.Codes",
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
             # Deprecated
             bool,
         ]
@@ -345,7 +380,7 @@ class SpamChecker:
 
     async def check_event_for_spam(
         self, event: "synapse.events.EventBase"
-    ) -> Union[Tuple["synapse.api.errors.Codes", Dict], str]:
+    ) -> Union[Tuple[Codes, JsonDict], str]:
         """Checks if a given event is considered "spammy" by this server.
 
         If the server considers an event spammy, then it will be rejected if
@@ -376,7 +411,16 @@ class SpamChecker:
                 elif res is True:
                     # This spam-checker rejects the event with deprecated
                     # return value `True`
-                    return (synapse.api.errors.Codes.FORBIDDEN, {})
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
                 elif not isinstance(res, str):
                     # mypy complains that we can't reach this code because of the
                     # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know
@@ -422,7 +466,7 @@ class SpamChecker:
 
     async def user_may_join_room(
         self, user_id: str, room_id: str, is_invited: bool
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, JsonDict], Literal["NOT_SPAM"]]:
         """Checks if a given users is allowed to join a room.
         Not called when a user creates a room.
 
@@ -432,7 +476,7 @@ class SpamChecker:
             is_invited: Whether the user is invited into the room
 
         Returns:
-            NOT_SPAM if the operation is permitted, Codes otherwise.
+            NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise.
         """
         for callback in self._user_may_join_room_callbacks:
             with Measure(
@@ -443,21 +487,28 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting join as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         # No spam-checker has rejected the request, let it pass.
         return self.NOT_SPAM
 
     async def user_may_invite(
         self, inviter_userid: str, invitee_userid: str, room_id: str
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a given user may send an invite
 
         Args:
@@ -479,21 +530,28 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting invite as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         # No spam-checker has rejected the request, let it pass.
         return self.NOT_SPAM
 
     async def user_may_send_3pid_invite(
         self, inviter_userid: str, medium: str, address: str, room_id: str
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a given user may invite a given threepid into the room
 
         Note that if the threepid is already associated with a Matrix user ID, Synapse
@@ -519,20 +577,27 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting 3pid invite as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
 
     async def user_may_create_room(
         self, userid: str
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a given user may create a room
 
         Args:
@@ -546,20 +611,27 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting room creation as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
 
     async def user_may_create_room_alias(
         self, userid: str, room_alias: RoomAlias
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a given user may create a room alias
 
         Args:
@@ -575,20 +647,27 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting room create as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
 
     async def user_may_publish_room(
         self, userid: str, room_id: str
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a given user may publish a room to the directory
 
         Args:
@@ -603,14 +682,21 @@ class SpamChecker:
                 if res is True or res is self.NOT_SPAM:
                     continue
                 elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting room publication as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
 
@@ -678,7 +764,7 @@ class SpamChecker:
 
     async def check_media_file_for_spam(
         self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
-    ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]:
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
         """Checks if a piece of newly uploaded media should be blocked.
 
         This will be called for local uploads, downloads of remote media, each
@@ -715,13 +801,20 @@ class SpamChecker:
                 if res is False or res is self.NOT_SPAM:
                     continue
                 elif res is True:
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
                 elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
                     return res
                 else:
                     logger.warning(
                         "Module returned invalid value, rejecting media file as spam"
                     )
-                    return synapse.api.errors.Codes.FORBIDDEN
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 8b0f16f965..09a7a4b238 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -149,7 +149,8 @@ class DirectoryHandler:
                 raise AuthError(
                     403,
                     "This user is not permitted to create this alias",
-                    spam_check,
+                    errcode=spam_check[0],
+                    additional_fields=spam_check[1],
                 )
 
             if not self.config.roomdirectory.is_alias_creation_allowed(
@@ -441,7 +442,8 @@ class DirectoryHandler:
             raise AuthError(
                 403,
                 "This user is not permitted to publish rooms to the room list",
-                spam_check,
+                errcode=spam_check[0],
+                additional_fields=spam_check[1],
             )
 
         if requester.is_guest:
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index e2564e9340..3b5eaf5156 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -844,7 +844,8 @@ class FederationHandler:
             raise SynapseError(
                 403,
                 "This user is not permitted to send invites to this server/user",
-                spam_check,
+                errcode=spam_check[0],
+                additional_fields=spam_check[1],
             )
 
         membership = event.content.get("membership")
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 44f8084579..8dd94cbc76 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -440,7 +440,12 @@ class RoomCreationHandler:
 
         spam_check = await self.spam_checker.user_may_create_room(user_id)
         if spam_check != NOT_SPAM:
-            raise SynapseError(403, "You are not permitted to create rooms", spam_check)
+            raise SynapseError(
+                403,
+                "You are not permitted to create rooms",
+                errcode=spam_check[0],
+                additional_fields=spam_check[1],
+            )
 
         creation_content: JsonDict = {
             "room_version": new_room_version.identifier,
@@ -731,7 +736,10 @@ class RoomCreationHandler:
             spam_check = await self.spam_checker.user_may_create_room(user_id)
             if spam_check != NOT_SPAM:
                 raise SynapseError(
-                    403, "You are not permitted to create rooms", spam_check
+                    403,
+                    "You are not permitted to create rooms",
+                    errcode=spam_check[0],
+                    additional_fields=spam_check[1],
                 )
 
         if ratelimit:
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index a1d8875dd8..04c44b2ccb 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -685,7 +685,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             if target_id == self._server_notices_mxid:
                 raise SynapseError(HTTPStatus.FORBIDDEN, "Cannot invite this user")
 
-            block_invite_code = None
+            block_invite_result = None
 
             if (
                 self._server_notices_mxid is not None
@@ -703,18 +703,21 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                         "Blocking invite: user is not admin and non-admin "
                         "invites disabled"
                     )
-                    block_invite_code = Codes.FORBIDDEN
+                    block_invite_result = (Codes.FORBIDDEN, {})
 
                 spam_check = await self.spam_checker.user_may_invite(
                     requester.user.to_string(), target_id, room_id
                 )
                 if spam_check != NOT_SPAM:
                     logger.info("Blocking invite due to spam checker")
-                    block_invite_code = spam_check
+                    block_invite_result = spam_check
 
-            if block_invite_code is not None:
+            if block_invite_result is not None:
                 raise SynapseError(
-                    403, "Invites have been disabled on this server", block_invite_code
+                    403,
+                    "Invites have been disabled on this server",
+                    errcode=block_invite_result[0],
+                    additional_fields=block_invite_result[1],
                 )
 
         # An empty prev_events list is allowed as long as the auth_event_ids are present
@@ -828,7 +831,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     target.to_string(), room_id, is_invited=inviter is not None
                 )
                 if spam_check != NOT_SPAM:
-                    raise SynapseError(403, "Not allowed to join this room", spam_check)
+                    raise SynapseError(
+                        403,
+                        "Not allowed to join this room",
+                        errcode=spam_check[0],
+                        additional_fields=spam_check[1],
+                    )
 
             # Check if a remote join should be performed.
             remote_join, remote_room_hosts = await self._should_perform_remote_join(
@@ -1387,7 +1395,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 room_id=room_id,
             )
             if spam_check != NOT_SPAM:
-                raise SynapseError(403, "Cannot send threepid invite", spam_check)
+                raise SynapseError(
+                    403,
+                    "Cannot send threepid invite",
+                    errcode=spam_check[0],
+                    additional_fields=spam_check[1],
+                )
 
             stream_id = await self._make_and_store_3pid_invite(
                 requester,
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 6191c2dc96..6d8bf54083 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -35,6 +35,7 @@ from typing_extensions import ParamSpec
 from twisted.internet import defer
 from twisted.web.resource import Resource
 
+from synapse.api import errors
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase
 from synapse.events.presence_router import (
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 9137417342..a5c3de192f 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -154,7 +154,9 @@ class MediaStorage:
                         # Note that we'll delete the stored media, due to the
                         # try/except below. The media also won't be stored in
                         # the DB.
-                        raise SpamMediaException(errcode=spam_check)
+                        # We currently ignore any additional field returned by
+                        # the spam-check API.
+                        raise SpamMediaException(errcode=spam_check[0])
 
                     for provider in self.storage_providers:
                         await provider.store_file(path, file_info)
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 1ccd96a207..e67844cfa1 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -22,7 +22,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 from unittest.mock import Mock, call
 from urllib import parse as urlparse
 
-# `Literal` appears with Python 3.8.
+from parameterized import param, parameterized
 from typing_extensions import Literal
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -815,14 +815,14 @@ class RoomsCreateTestCase(RoomBase):
         In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`.
         """
 
-        async def user_may_join_room(
+        async def user_may_join_room_codes(
             mxid: str,
             room_id: str,
             is_invite: bool,
         ) -> Codes:
             return Codes.CONSENT_NOT_GIVEN
 
-        join_mock = Mock(side_effect=user_may_join_room)
+        join_mock = Mock(side_effect=user_may_join_room_codes)
         self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock)
 
         channel = self.make_request(
@@ -834,6 +834,25 @@ class RoomsCreateTestCase(RoomBase):
 
         self.assertEqual(join_mock.call_count, 0)
 
+        # Now change the return value of the callback to deny any join. Since we're
+        # creating the room, despite the return value, we should be able to join.
+        async def user_may_join_room_tuple(
+            mxid: str,
+            room_id: str,
+            is_invite: bool,
+        ) -> Tuple[Codes, dict]:
+            return Codes.INCOMPATIBLE_ROOM_VERSION, {}
+
+        join_mock.side_effect = user_may_join_room_tuple
+
+        channel = self.make_request(
+            "POST",
+            "/createRoom",
+            {},
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(join_mock.call_count, 0)
+
 
 class RoomTopicTestCase(RoomBase):
     """Tests /rooms/$room_id/topic REST events."""
@@ -1113,13 +1132,15 @@ class RoomJoinTestCase(RoomBase):
         """
 
         # Register a dummy callback. Make it allow all room joins for now.
-        return_value: Union[Literal["NOT_SPAM"], Codes] = synapse.module_api.NOT_SPAM
+        return_value: Union[
+            Literal["NOT_SPAM"], Tuple[Codes, dict], Codes
+        ] = synapse.module_api.NOT_SPAM
 
         async def user_may_join_room(
             userid: str,
             room_id: str,
             is_invited: bool,
-        ) -> Union[Literal["NOT_SPAM"], Codes]:
+        ) -> Union[Literal["NOT_SPAM"], Tuple[Codes, dict], Codes]:
             return return_value
 
         # `spec` argument is needed for this function mock to have `__qualname__`, which
@@ -1163,8 +1184,28 @@ class RoomJoinTestCase(RoomBase):
         )
 
         # Now make the callback deny all room joins, and check that a join actually fails.
+        # We pick an arbitrary Codes rather than the default `Codes.FORBIDDEN`.
         return_value = Codes.CONSENT_NOT_GIVEN
-        self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2)
+        self.helper.invite(self.room3, self.user1, self.user2, tok=self.tok1)
+        self.helper.join(
+            self.room3,
+            self.user2,
+            expect_code=403,
+            expect_errcode=return_value,
+            tok=self.tok2,
+        )
+
+        # Now make the callback deny all room joins, and check that a join actually fails.
+        # As above, with the experimental extension that lets us return dictionaries.
+        return_value = (Codes.BAD_ALIAS, {"another_field": "12345"})
+        self.helper.join(
+            self.room3,
+            self.user2,
+            expect_code=403,
+            expect_errcode=return_value[0],
+            tok=self.tok2,
+            expect_additional_fields=return_value[1],
+        )
 
 
 class RoomJoinRatelimitTestCase(RoomBase):
@@ -1314,6 +1355,97 @@ class RoomMessagesTestCase(RoomBase):
         channel = self.make_request("PUT", path, content)
         self.assertEqual(200, channel.code, msg=channel.result["body"])
 
+    @parameterized.expand(
+        [
+            # Allow
+            param(
+                name="NOT_SPAM", value="NOT_SPAM", expected_code=200, expected_fields={}
+            ),
+            param(name="False", value=False, expected_code=200, expected_fields={}),
+            # Block
+            param(
+                name="scalene string",
+                value="ANY OTHER STRING",
+                expected_code=403,
+                expected_fields={"errcode": "M_FORBIDDEN"},
+            ),
+            param(
+                name="True",
+                value=True,
+                expected_code=403,
+                expected_fields={"errcode": "M_FORBIDDEN"},
+            ),
+            param(
+                name="Code",
+                value=Codes.LIMIT_EXCEEDED,
+                expected_code=403,
+                expected_fields={"errcode": "M_LIMIT_EXCEEDED"},
+            ),
+            param(
+                name="Tuple",
+                value=(Codes.SERVER_NOT_TRUSTED, {"additional_field": "12345"}),
+                expected_code=403,
+                expected_fields={
+                    "errcode": "M_SERVER_NOT_TRUSTED",
+                    "additional_field": "12345",
+                },
+            ),
+        ]
+    )
+    def test_spam_checker_check_event_for_spam(
+        self,
+        name: str,
+        value: Union[str, bool, Codes, Tuple[Codes, JsonDict]],
+        expected_code: int,
+        expected_fields: dict,
+    ) -> None:
+        class SpamCheck:
+            mock_return_value: Union[
+                str, bool, Codes, Tuple[Codes, JsonDict], bool
+            ] = "NOT_SPAM"
+            mock_content: Optional[JsonDict] = None
+
+            async def check_event_for_spam(
+                self,
+                event: synapse.events.EventBase,
+            ) -> Union[str, Codes, Tuple[Codes, JsonDict], bool]:
+                self.mock_content = event.content
+                return self.mock_return_value
+
+        spam_checker = SpamCheck()
+
+        self.hs.get_spam_checker()._check_event_for_spam_callbacks.append(
+            spam_checker.check_event_for_spam
+        )
+
+        # Inject `value` as mock_return_value
+        spam_checker.mock_return_value = value
+        path = "/rooms/%s/send/m.room.message/check_event_for_spam_%s" % (
+            urlparse.quote(self.room_id),
+            urlparse.quote(name),
+        )
+        body = "test-%s" % name
+        content = '{"body":"%s","msgtype":"m.text"}' % body
+        channel = self.make_request("PUT", path, content)
+
+        # Check that the callback has witnessed the correct event.
+        self.assertIsNotNone(spam_checker.mock_content)
+        if (
+            spam_checker.mock_content is not None
+        ):  # Checked just above, but mypy doesn't know about that.
+            self.assertEqual(
+                spam_checker.mock_content["body"], body, spam_checker.mock_content
+            )
+
+        # Check that we have the correct result.
+        self.assertEqual(expected_code, channel.code, msg=channel.result["body"])
+        for expected_key, expected_value in expected_fields.items():
+            self.assertEqual(
+                channel.json_body.get(expected_key, None),
+                expected_value,
+                "Field %s absent or invalid " % expected_key,
+            )
+
 
 class RoomPowerLevelOverridesTestCase(RoomBase):
     """Tests that the power levels can be overridden with server config."""
@@ -3235,7 +3367,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         make_invite_mock.assert_called_once()
 
         # Now change the return value of the callback to deny any invite and test that
-        # we can't send the invite.
+        # we can't send the invite. We pick an arbitrary error code to be able to check
+        # that the same code has been returned
         mock.return_value = make_awaitable(Codes.CONSENT_NOT_GIVEN)
         channel = self.make_request(
             method="POST",
@@ -3249,6 +3382,27 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
             access_token=self.tok,
         )
         self.assertEqual(channel.code, 403)
+        self.assertEqual(channel.json_body["errcode"], Codes.CONSENT_NOT_GIVEN)
+
+        # Also check that it stopped before calling _make_and_store_3pid_invite.
+        make_invite_mock.assert_called_once()
+
+        # Run variant with `Tuple[Codes, dict]`.
+        mock.return_value = make_awaitable((Codes.EXPIRED_ACCOUNT, {"field": "value"}))
+        channel = self.make_request(
+            method="POST",
+            path="/rooms/" + self.room_id + "/invite",
+            content={
+                "id_server": "example.com",
+                "id_access_token": "sometoken",
+                "medium": "email",
+                "address": email_to_invite,
+            },
+            access_token=self.tok,
+        )
+        self.assertEqual(channel.code, 403)
+        self.assertEqual(channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT)
+        self.assertEqual(channel.json_body["field"], "value")
 
         # Also check that it stopped before calling _make_and_store_3pid_invite.
         make_invite_mock.assert_called_once()
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index a0788b1bb0..93f749744d 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -41,6 +41,7 @@ from twisted.web.resource import Resource
 from twisted.web.server import Site
 
 from synapse.api.constants import Membership
+from synapse.api.errors import Codes
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 
@@ -171,6 +172,8 @@ class RestHelper:
         expect_code: int = HTTPStatus.OK,
         tok: Optional[str] = None,
         appservice_user_id: Optional[str] = None,
+        expect_errcode: Optional[Codes] = None,
+        expect_additional_fields: Optional[dict] = None,
     ) -> None:
         self.change_membership(
             room=room,
@@ -180,6 +183,8 @@ class RestHelper:
             appservice_user_id=appservice_user_id,
             membership=Membership.JOIN,
             expect_code=expect_code,
+            expect_errcode=expect_errcode,
+            expect_additional_fields=expect_additional_fields,
         )
 
     def knock(
@@ -263,6 +268,7 @@ class RestHelper:
         appservice_user_id: Optional[str] = None,
         expect_code: int = HTTPStatus.OK,
         expect_errcode: Optional[str] = None,
+        expect_additional_fields: Optional[dict] = None,
     ) -> None:
         """
         Send a membership state event into a room.
@@ -323,6 +329,21 @@ class RestHelper:
                 channel.result["body"],
             )
 
+        if expect_additional_fields is not None:
+            for expect_key, expect_value in expect_additional_fields.items():
+                assert expect_key in channel.json_body, "Expected field %s, got %s" % (
+                    expect_key,
+                    channel.json_body,
+                )
+                assert (
+                    channel.json_body[expect_key] == expect_value
+                ), "Expected: %s at %s, got: %s, resp: %s" % (
+                    expect_value,
+                    expect_key,
+                    channel.json_body[expect_key],
+                    channel.json_body,
+                )
+
         self.auth_user_id = temp_id
 
     def send(
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 1c67e1ca91..79727c430f 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -23,11 +23,13 @@ from urllib import parse
 import attr
 from parameterized import parameterized, parameterized_class
 from PIL import Image as Image
+from typing_extensions import Literal
 
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.errors import Codes
 from synapse.events import EventBase
 from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.logging.context import make_deferred_yieldable
@@ -570,9 +572,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         )
 
 
-class TestSpamChecker:
+class TestSpamCheckerLegacy:
     """A spam checker module that rejects all media that includes the bytes
     `evil`.
+
+    Uses the legacy Spam-Checker API.
     """
 
     def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
@@ -613,7 +617,7 @@ class TestSpamChecker:
         return b"evil" in buf.getvalue()
 
 
-class SpamCheckerTestCase(unittest.HomeserverTestCase):
+class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
     servlets = [
         login.register_servlets,
         admin.register_servlets,
@@ -637,7 +641,8 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase):
             {
                 "spam_checker": [
                     {
-                        "module": TestSpamChecker.__module__ + ".TestSpamChecker",
+                        "module": TestSpamCheckerLegacy.__module__
+                        + ".TestSpamCheckerLegacy",
                         "config": {},
                     }
                 ]
@@ -662,3 +667,62 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase):
         self.helper.upload_media(
             self.upload_resource, data, tok=self.tok, expect_code=400
         )
+
+
+EVIL_DATA = b"Some evil data"
+EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
+
+
+class SpamCheckerTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        admin.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.user = self.register_user("user", "pass")
+        self.tok = self.login("user", "pass")
+
+        # Allow for uploading and downloading to/from the media repo
+        self.media_repo = hs.get_media_repository_resource()
+        self.download_resource = self.media_repo.children[b"download"]
+        self.upload_resource = self.media_repo.children[b"upload"]
+
+        hs.get_module_api().register_spam_checker_callbacks(
+            check_media_file_for_spam=self.check_media_file_for_spam
+        )
+
+    async def check_media_file_for_spam(
+        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
+    ) -> Union[Codes, Literal["NOT_SPAM"]]:
+        buf = BytesIO()
+        await file_wrapper.write_chunks_to(buf.write)
+
+        if buf.getvalue() == EVIL_DATA:
+            return Codes.FORBIDDEN
+        elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
+            return (Codes.FORBIDDEN, {})
+        else:
+            return "NOT_SPAM"
+
+    def test_upload_innocent(self) -> None:
+        """Attempt to upload some innocent data that should be allowed."""
+        self.helper.upload_media(
+            self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
+        )
+
+    def test_upload_ban(self) -> None:
+        """Attempt to upload some data that includes bytes "evil", which should
+        get rejected by the spam checker.
+        """
+
+        self.helper.upload_media(
+            self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
+        )
+
+        self.helper.upload_media(
+            self.upload_resource,
+            EVIL_DATA_EXPERIMENT,
+            tok=self.tok,
+            expect_code=400,
+        )

From 92202ce8670b3025bf7798831cdd5f21efa280d5 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 11 Jul 2022 19:00:12 +0200
Subject: [PATCH 097/178] Reduce event lookups during room creation by passing
 known event IDs (#13210)

Inspired by the room batch handler, this uses previous event inserts to
pre-populate prev events during room creation, reducing the number of
queries required to create a room.

Signed off by Nick @ Beeper (@Fizzadar)
---
 changelog.d/13210.misc          |  1 +
 synapse/handlers/room.py        | 18 ++++++++++++++++--
 tests/rest/client/test_rooms.py | 15 +++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13210.misc

diff --git a/changelog.d/13210.misc b/changelog.d/13210.misc
new file mode 100644
index 0000000000..407791b8e5
--- /dev/null
+++ b/changelog.d/13210.misc
@@ -0,0 +1 @@
+Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 8dd94cbc76..a54f163c0a 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1019,6 +1019,8 @@ class RoomCreationHandler:
 
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
 
+        last_sent_event_id: Optional[str] = None
+
         def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
             e = {"type": etype, "content": content}
 
@@ -1028,19 +1030,27 @@ class RoomCreationHandler:
             return e
 
         async def send(etype: str, content: JsonDict, **kwargs: Any) -> int:
+            nonlocal last_sent_event_id
+
             event = create(etype, content, **kwargs)
             logger.debug("Sending %s in new room", etype)
             # Allow these events to be sent even if the user is shadow-banned to
             # allow the room creation to complete.
             (
-                _,
+                sent_event,
                 last_stream_id,
             ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 creator,
                 event,
                 ratelimit=False,
                 ignore_shadow_ban=True,
+                # Note: we don't pass state_event_ids here because this triggers
+                # an additional query per event to look them up from the events table.
+                prev_event_ids=[last_sent_event_id] if last_sent_event_id else [],
             )
+
+            last_sent_event_id = sent_event.event_id
+
             return last_stream_id
 
         try:
@@ -1054,7 +1064,9 @@ class RoomCreationHandler:
         await send(etype=EventTypes.Create, content=creation_content)
 
         logger.debug("Sending %s in new room", EventTypes.Member)
-        await self.room_member_handler.update_membership(
+        # Room create event must exist at this point
+        assert last_sent_event_id is not None
+        member_event_id, _ = await self.room_member_handler.update_membership(
             creator,
             creator.user,
             room_id,
@@ -1062,7 +1074,9 @@ class RoomCreationHandler:
             ratelimit=ratelimit,
             content=creator_join_profile,
             new_room=True,
+            prev_event_ids=[last_sent_event_id],
         )
+        last_sent_event_id = member_event_id
 
         # We treat the power levels override specially as this needs to be one
         # of the first events that get sent into a room.
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index e67844cfa1..d19b1bb858 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -708,6 +708,21 @@ class RoomsCreateTestCase(RoomBase):
 
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
+        assert channel.resource_usage is not None
+        self.assertEqual(33, channel.resource_usage.db_txn_count)
+
+    def test_post_room_initial_state(self) -> None:
+        # POST with initial_state config key, expect new room id
+        channel = self.make_request(
+            "POST",
+            "/createRoom",
+            b'{"initial_state":[{"type": "m.bridge", "content": {}}]}',
+        )
+
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertTrue("room_id" in channel.json_body)
+        assert channel.resource_usage is not None
+        self.assertEqual(37, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id

From bc8eefc1e144eaeda4cb3f8171135ba03b94f2b4 Mon Sep 17 00:00:00 2001
From: villepeh <100730729+villepeh@users.noreply.github.com>
Date: Mon, 11 Jul 2022 20:33:53 +0300
Subject: [PATCH 098/178] Add a sample bash script to docs for creating
 multiple worker files (#13032)

Signed-off-by: Ville Petteri Huh.
---
 changelog.d/13032.doc                         |  1 +
 .../create-multiple-workers.md                | 31 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 changelog.d/13032.doc
 create mode 100644 contrib/workers-bash-scripts/create-multiple-workers.md

diff --git a/changelog.d/13032.doc b/changelog.d/13032.doc
new file mode 100644
index 0000000000..54d45ecd0d
--- /dev/null
+++ b/changelog.d/13032.doc
@@ -0,0 +1 @@
+Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh.
diff --git a/contrib/workers-bash-scripts/create-multiple-workers.md b/contrib/workers-bash-scripts/create-multiple-workers.md
new file mode 100644
index 0000000000..ad5142fe15
--- /dev/null
+++ b/contrib/workers-bash-scripts/create-multiple-workers.md
@@ -0,0 +1,31 @@
+# Creating multiple workers with a bash script
+
+Setting up multiple worker configuration files manually can be time-consuming.
+You can alternatively create multiple worker configuration files with a simple `bash` script. For example:
+
+```sh
+#!/bin/bash
+for i in {1..5}
+do
+cat << EOF >> generic_worker$i.yaml
+worker_app: synapse.app.generic_worker
+worker_name: generic_worker$i
+
+# The replication listener on the main synapse process.
+worker_replication_host: 127.0.0.1
+worker_replication_http_port: 9093
+
+worker_listeners:
+  - type: http
+    port: 808$i
+    resources:
+      - names: [client, federation]
+
+worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml
+EOF
+done
+```
+
+This would create five generic workers with a unique `worker_name` field in each file and listening on ports 8081-8085.
+
+Customise the script to your needs.

From e5716b631c6fe0b0a8510f16a5bffddb6396f434 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 11 Jul 2022 21:08:39 +0100
Subject: [PATCH 099/178] Don't pull out the full state when calculating push
 actions (#13078)

---
 changelog.d/13078.misc                       |   1 +
 changelog.d/13222.misc                       |   2 +-
 synapse/push/bulk_push_rule_evaluator.py     | 394 +++----------------
 synapse/storage/_base.py                     |   9 +
 synapse/storage/databases/main/events.py     |  12 +
 synapse/storage/databases/main/roommember.py |  86 ++++
 tests/rest/client/test_rooms.py              |   4 +-
 7 files changed, 164 insertions(+), 344 deletions(-)
 create mode 100644 changelog.d/13078.misc

diff --git a/changelog.d/13078.misc b/changelog.d/13078.misc
new file mode 100644
index 0000000000..3835e97ad9
--- /dev/null
+++ b/changelog.d/13078.misc
@@ -0,0 +1 @@
+Reduce memory consumption when processing incoming events in large rooms.
diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc
index 0bab1aed70..3835e97ad9 100644
--- a/changelog.d/13222.misc
+++ b/changelog.d/13222.misc
@@ -1 +1 @@
-Improve memory usage of calculating push actions for events in large rooms.
+Reduce memory consumption when processing incoming events in large rooms.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 7791b289e2..e581af9a9a 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -17,7 +17,6 @@ import itertools
 import logging
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union
 
-import attr
 from prometheus_client import Counter
 
 from synapse.api.constants import EventTypes, Membership, RelationTypes
@@ -26,13 +25,11 @@ from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
-from synapse.util.async_helpers import Linearizer
-from synapse.util.caches import CacheMetric, register_cache
-from synapse.util.caches.descriptors import lru_cache
-from synapse.util.caches.lrucache import LruCache
+from synapse.storage.state import StateFilter
+from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
+from synapse.visibility import filter_event_for_clients_with_state
 
-from ..storage.state import StateFilter
 from .push_rule_evaluator import PushRuleEvaluatorForEvent
 
 if TYPE_CHECKING:
@@ -48,15 +45,6 @@ push_rules_state_size_counter = Counter(
     "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", ""
 )
 
-# Measures whether we use the fast path of using state deltas, or if we have to
-# recalculate from scratch
-push_rules_delta_state_cache_metric = register_cache(
-    "cache",
-    "push_rules_delta_state_cache_metric",
-    cache=[],  # Meaningless size, as this isn't a cache that stores values
-    resizable=False,
-)
-
 
 STATE_EVENT_TYPES_TO_MARK_UNREAD = {
     EventTypes.Topic,
@@ -111,10 +99,6 @@ class BulkPushRuleEvaluator:
         self.clock = hs.get_clock()
         self._event_auth_handler = hs.get_event_auth_handler()
 
-        # Used by `RulesForRoom` to ensure only one thing mutates the cache at a
-        # time. Keyed off room_id.
-        self._rules_linearizer = Linearizer(name="rules_for_room")
-
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
             "room_push_rule_cache",
@@ -126,48 +110,48 @@ class BulkPushRuleEvaluator:
         self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled
 
     async def _get_rules_for_event(
-        self, event: EventBase, context: EventContext
+        self,
+        event: EventBase,
     ) -> Dict[str, List[Dict[str, Any]]]:
-        """This gets the rules for all users in the room at the time of the event,
-        as well as the push rules for the invitee if the event is an invite.
+        """Get the push rules for all users who may need to be notified about
+        the event.
+
+        Note: this does not check if the user is allowed to see the event.
 
         Returns:
-            dict of user_id -> push_rules
+            Mapping of user ID to their push rules.
         """
-        room_id = event.room_id
+        # We get the users who may need to be notified by first fetching the
+        # local users currently in the room, finding those that have push rules,
+        # and *then* checking which users are actually allowed to see the event.
+        #
+        # The alternative is to first fetch all users that were joined at the
+        # event, but that requires fetching the full state at the event, which
+        # may be expensive for large rooms with few local users.
 
-        rules_for_room_data = self._get_rules_for_room(room_id)
-        rules_for_room = RulesForRoom(
-            hs=self.hs,
-            room_id=room_id,
-            rules_for_room_cache=self._get_rules_for_room.cache,
-            room_push_rule_cache_metrics=self.room_push_rule_cache_metrics,
-            linearizer=self._rules_linearizer,
-            cached_data=rules_for_room_data,
-        )
-
-        rules_by_user = await rules_for_room.get_rules(event, context)
+        local_users = await self.store.get_local_users_in_room(event.room_id)
 
         # if this event is an invite event, we may need to run rules for the user
         # who's been invited, otherwise they won't get told they've been invited
-        if event.type == "m.room.member" and event.content["membership"] == "invite":
+        if event.type == EventTypes.Member and event.membership == Membership.INVITE:
             invited = event.state_key
-            if invited and self.hs.is_mine_id(invited):
-                rules_by_user = dict(rules_by_user)
-                rules_by_user[invited] = await self.store.get_push_rules_for_user(
-                    invited
-                )
+            if invited and self.hs.is_mine_id(invited) and invited not in local_users:
+                local_users = list(local_users)
+                local_users.append(invited)
+
+        rules_by_user = await self.store.bulk_get_push_rules(local_users)
+
+        logger.debug("Users in room: %s", local_users)
+
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(
+                "Returning push rules for %r %r",
+                event.room_id,
+                list(rules_by_user.keys()),
+            )
 
         return rules_by_user
 
-    @lru_cache()
-    def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData":
-        """Get the current RulesForRoomData object for the given room id"""
-        # It's important that the RulesForRoomData object gets added to self._get_rules_for_room.cache
-        # before any lookup methods get called on it as otherwise there may be
-        # a race if invalidate_all gets called (which assumes its in the cache)
-        return RulesForRoomData()
-
     async def _get_power_levels_and_sender_level(
         self, event: EventBase, context: EventContext
     ) -> Tuple[dict, int]:
@@ -262,10 +246,12 @@ class BulkPushRuleEvaluator:
 
         count_as_unread = _should_count_as_unread(event, context)
 
-        rules_by_user = await self._get_rules_for_event(event, context)
+        rules_by_user = await self._get_rules_for_event(event)
         actions_by_user: Dict[str, List[Union[dict, str]]] = {}
 
-        room_members = await self.store.get_joined_users_from_context(event, context)
+        room_member_count = await self.store.get_number_joined_users_in_room(
+            event.room_id
+        )
 
         (
             power_levels,
@@ -278,30 +264,36 @@ class BulkPushRuleEvaluator:
 
         evaluator = PushRuleEvaluatorForEvent(
             event,
-            len(room_members),
+            room_member_count,
             sender_power_level,
             power_levels,
             relations,
             self._relations_match_enabled,
         )
 
-        # If the event is not a state event check if any users ignore the sender.
-        if not event.is_state():
-            ignorers = await self.store.ignored_by(event.sender)
-        else:
-            ignorers = frozenset()
+        users = rules_by_user.keys()
+        profiles = await self.store.get_subset_users_in_room_with_profiles(
+            event.room_id, users
+        )
+
+        # This is a check for the case where user joins a room without being
+        # allowed to see history, and then the server receives a delayed event
+        # from before the user joined, which they should not be pushed for
+        uids_with_visibility = await filter_event_for_clients_with_state(
+            self.store, users, event, context
+        )
 
         for uid, rules in rules_by_user.items():
             if event.sender == uid:
                 continue
 
-            if uid in ignorers:
+            if uid not in uids_with_visibility:
                 continue
 
             display_name = None
-            profile_info = room_members.get(uid)
-            if profile_info:
-                display_name = profile_info.display_name
+            profile = profiles.get(uid)
+            if profile:
+                display_name = profile.display_name
 
             if not display_name:
                 # Handle the case where we are pushing a membership event to
@@ -346,283 +338,3 @@ MemberMap = Dict[str, Optional[EventIdMembership]]
 Rule = Dict[str, dict]
 RulesByUser = Dict[str, List[Rule]]
 StateGroup = Union[object, int]
-
-
-@attr.s(slots=True, auto_attribs=True)
-class RulesForRoomData:
-    """The data stored in the cache by `RulesForRoom`.
-
-    We don't store `RulesForRoom` directly in the cache as we want our caches to
-    *only* include data, and not references to e.g. the data stores.
-    """
-
-    # event_id -> EventIdMembership
-    member_map: MemberMap = attr.Factory(dict)
-    # user_id -> rules
-    rules_by_user: RulesByUser = attr.Factory(dict)
-
-    # The last state group we updated the caches for. If the state_group of
-    # a new event comes along, we know that we can just return the cached
-    # result.
-    # On invalidation of the rules themselves (if the user changes them),
-    # we invalidate everything and set state_group to `object()`
-    state_group: StateGroup = attr.Factory(object)
-
-    # A sequence number to keep track of when we're allowed to update the
-    # cache. We bump the sequence number when we invalidate the cache. If
-    # the sequence number changes while we're calculating stuff we should
-    # not update the cache with it.
-    sequence: int = 0
-
-    # A cache of user_ids that we *know* aren't interesting, e.g. user_ids
-    # owned by AS's, or remote users, etc. (I.e. users we will never need to
-    # calculate push for)
-    # These never need to be invalidated as we will never set up push for
-    # them.
-    uninteresting_user_set: Set[str] = attr.Factory(set)
-
-
-class RulesForRoom:
-    """Caches push rules for users in a room.
-
-    This efficiently handles users joining/leaving the room by not invalidating
-    the entire cache for the room.
-
-    A new instance is constructed for each call to
-    `BulkPushRuleEvaluator._get_rules_for_event`, with the cached data from
-    previous calls passed in.
-    """
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        room_id: str,
-        rules_for_room_cache: LruCache,
-        room_push_rule_cache_metrics: CacheMetric,
-        linearizer: Linearizer,
-        cached_data: RulesForRoomData,
-    ):
-        """
-        Args:
-            hs: The HomeServer object.
-            room_id: The room ID.
-            rules_for_room_cache: The cache object that caches these
-                RoomsForUser objects.
-            room_push_rule_cache_metrics: The metrics object
-            linearizer: The linearizer used to ensure only one thing mutates
-                the cache at a time. Keyed off room_id
-            cached_data: Cached data from previous calls to `self.get_rules`,
-                can be mutated.
-        """
-        self.room_id = room_id
-        self.is_mine_id = hs.is_mine_id
-        self.store = hs.get_datastores().main
-        self.room_push_rule_cache_metrics = room_push_rule_cache_metrics
-
-        # Used to ensure only one thing mutates the cache at a time. Keyed off
-        # room_id.
-        self.linearizer = linearizer
-
-        self.data = cached_data
-
-        # We need to be clever on the invalidating caches callbacks, as
-        # otherwise the invalidation callback holds a reference to the object,
-        # potentially causing it to leak.
-        # To get around this we pass a function that on invalidations looks ups
-        # the RoomsForUser entry in the cache, rather than keeping a reference
-        # to self around in the callback.
-        self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id)
-
-    async def get_rules(
-        self, event: EventBase, context: EventContext
-    ) -> Dict[str, List[Dict[str, dict]]]:
-        """Given an event context return the rules for all users who are
-        currently in the room.
-        """
-        state_group = context.state_group
-
-        if state_group and self.data.state_group == state_group:
-            logger.debug("Using cached rules for %r", self.room_id)
-            self.room_push_rule_cache_metrics.inc_hits()
-            return self.data.rules_by_user
-
-        async with self.linearizer.queue(self.room_id):
-            if state_group and self.data.state_group == state_group:
-                logger.debug("Using cached rules for %r", self.room_id)
-                self.room_push_rule_cache_metrics.inc_hits()
-                return self.data.rules_by_user
-
-            self.room_push_rule_cache_metrics.inc_misses()
-
-            ret_rules_by_user = {}
-            missing_member_event_ids = {}
-            if state_group and self.data.state_group == context.prev_group:
-                # If we have a simple delta then we can reuse most of the previous
-                # results.
-                ret_rules_by_user = self.data.rules_by_user
-                current_state_ids = context.delta_ids
-
-                push_rules_delta_state_cache_metric.inc_hits()
-            else:
-                current_state_ids = await context.get_current_state_ids()
-                push_rules_delta_state_cache_metric.inc_misses()
-            # Ensure the state IDs exist.
-            assert current_state_ids is not None
-
-            push_rules_state_size_counter.inc(len(current_state_ids))
-
-            logger.debug(
-                "Looking for member changes in %r %r", state_group, current_state_ids
-            )
-
-            # Loop through to see which member events we've seen and have rules
-            # for and which we need to fetch
-            for key in current_state_ids:
-                typ, user_id = key
-                if typ != EventTypes.Member:
-                    continue
-
-                if user_id in self.data.uninteresting_user_set:
-                    continue
-
-                if not self.is_mine_id(user_id):
-                    self.data.uninteresting_user_set.add(user_id)
-                    continue
-
-                if self.store.get_if_app_services_interested_in_user(user_id):
-                    self.data.uninteresting_user_set.add(user_id)
-                    continue
-
-                event_id = current_state_ids[key]
-
-                res = self.data.member_map.get(event_id, None)
-                if res:
-                    if res.membership == Membership.JOIN:
-                        rules = self.data.rules_by_user.get(res.user_id, None)
-                        if rules:
-                            ret_rules_by_user[res.user_id] = rules
-                    continue
-
-                # If a user has left a room we remove their push rule. If they
-                # joined then we re-add it later in _update_rules_with_member_event_ids
-                ret_rules_by_user.pop(user_id, None)
-                missing_member_event_ids[user_id] = event_id
-
-            if missing_member_event_ids:
-                # If we have some member events we haven't seen, look them up
-                # and fetch push rules for them if appropriate.
-                logger.debug("Found new member events %r", missing_member_event_ids)
-                await self._update_rules_with_member_event_ids(
-                    ret_rules_by_user, missing_member_event_ids, state_group, event
-                )
-            else:
-                # The push rules didn't change but lets update the cache anyway
-                self.update_cache(
-                    self.data.sequence,
-                    members={},  # There were no membership changes
-                    rules_by_user=ret_rules_by_user,
-                    state_group=state_group,
-                )
-
-        if logger.isEnabledFor(logging.DEBUG):
-            logger.debug(
-                "Returning push rules for %r %r", self.room_id, ret_rules_by_user.keys()
-            )
-        return ret_rules_by_user
-
-    async def _update_rules_with_member_event_ids(
-        self,
-        ret_rules_by_user: Dict[str, list],
-        member_event_ids: Dict[str, str],
-        state_group: Optional[int],
-        event: EventBase,
-    ) -> None:
-        """Update the partially filled rules_by_user dict by fetching rules for
-        any newly joined users in the `member_event_ids` list.
-
-        Args:
-            ret_rules_by_user: Partially filled dict of push rules. Gets
-                updated with any new rules.
-            member_event_ids: Dict of user id to event id for membership events
-                that have happened since the last time we filled rules_by_user
-            state_group: The state group we are currently computing push rules
-                for. Used when updating the cache.
-            event: The event we are currently computing push rules for.
-        """
-        sequence = self.data.sequence
-
-        members = await self.store.get_membership_from_event_ids(
-            member_event_ids.values()
-        )
-
-        # If the event is a join event then it will be in current state events
-        # map but not in the DB, so we have to explicitly insert it.
-        if event.type == EventTypes.Member:
-            for event_id in member_event_ids.values():
-                if event_id == event.event_id:
-                    members[event_id] = EventIdMembership(
-                        user_id=event.state_key, membership=event.membership
-                    )
-
-        if logger.isEnabledFor(logging.DEBUG):
-            logger.debug("Found members %r: %r", self.room_id, members.values())
-
-        joined_user_ids = {
-            entry.user_id
-            for entry in members.values()
-            if entry and entry.membership == Membership.JOIN
-        }
-
-        logger.debug("Joined: %r", joined_user_ids)
-
-        # Previously we only considered users with pushers or read receipts in that
-        # room. We can't do this anymore because we use push actions to calculate unread
-        # counts, which don't rely on the user having pushers or sent a read receipt into
-        # the room. Therefore we just need to filter for local users here.
-        user_ids = list(filter(self.is_mine_id, joined_user_ids))
-
-        rules_by_user = await self.store.bulk_get_push_rules(
-            user_ids, on_invalidate=self.invalidate_all_cb
-        )
-
-        ret_rules_by_user.update(
-            item for item in rules_by_user.items() if item[0] is not None
-        )
-
-        self.update_cache(sequence, members, ret_rules_by_user, state_group)
-
-    def update_cache(
-        self,
-        sequence: int,
-        members: MemberMap,
-        rules_by_user: RulesByUser,
-        state_group: StateGroup,
-    ) -> None:
-        if sequence == self.data.sequence:
-            self.data.member_map.update(members)
-            self.data.rules_by_user = rules_by_user
-            self.data.state_group = state_group
-
-
-@attr.attrs(slots=True, frozen=True, auto_attribs=True)
-class _Invalidation:
-    # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules,
-    # which means that it it is stored on the bulk_get_push_rules cache entry. In order
-    # to ensure that we don't accumulate lots of redundant callbacks on the cache entry,
-    # we need to ensure that two _Invalidation objects are "equal" if they refer to the
-    # same `cache` and `room_id`.
-    #
-    # attrs provides suitable __hash__ and __eq__ methods, provided we remember to
-    # set `frozen=True`.
-
-    cache: LruCache
-    room_id: str
-
-    def __call__(self) -> None:
-        rules_data = self.cache.get(self.room_id, None, update_metrics=False)
-        if rules_data:
-            rules_data.sequence += 1
-            rules_data.state_group = object()
-            rules_data.member_map = {}
-            rules_data.rules_by_user = {}
-            push_rules_invalidation_counter.inc()
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index abfc56b061..b8c8dcd76b 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -75,6 +75,15 @@ class SQLBaseStore(metaclass=ABCMeta):
             self._attempt_to_invalidate_cache(
                 "get_users_in_room_with_profiles", (room_id,)
             )
+            self._attempt_to_invalidate_cache(
+                "get_number_joined_users_in_room", (room_id,)
+            )
+            self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
+
+        for user_id in members_changed:
+            self._attempt_to_invalidate_cache(
+                "get_user_in_room_with_profile", (room_id, user_id)
+            )
 
         # Purge other caches based on room state.
         self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 2ff3d21305..eb4efbb93c 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1797,6 +1797,18 @@ class PersistEventsStore:
                 self.store.get_invited_rooms_for_local_user.invalidate,
                 (event.state_key,),
             )
+            txn.call_after(
+                self.store.get_local_users_in_room.invalidate,
+                (event.room_id,),
+            )
+            txn.call_after(
+                self.store.get_number_joined_users_in_room.invalidate,
+                (event.room_id,),
+            )
+            txn.call_after(
+                self.store.get_user_in_room_with_profile.invalidate,
+                (event.room_id, event.state_key),
+            )
 
             # The `_get_membership_from_event_id` is immutable, except for the
             # case where we look up an event *before* persisting it.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 31bc8c5601..0b5e4e4254 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -212,6 +212,60 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         txn.execute(sql, (room_id, Membership.JOIN))
         return [r[0] for r in txn]
 
+    @cached()
+    def get_user_in_room_with_profile(
+        self, room_id: str, user_id: str
+    ) -> Dict[str, ProfileInfo]:
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="get_user_in_room_with_profile", list_name="user_ids"
+    )
+    async def get_subset_users_in_room_with_profiles(
+        self, room_id: str, user_ids: Collection[str]
+    ) -> Dict[str, ProfileInfo]:
+        """Get a mapping from user ID to profile information for a list of users
+        in a given room.
+
+        The profile information comes directly from this room's `m.room.member`
+        events, and so may be specific to this room rather than part of a user's
+        global profile. To avoid privacy leaks, the profile data should only be
+        revealed to users who are already in this room.
+
+        Args:
+            room_id: The ID of the room to retrieve the users of.
+            user_ids: a list of users in the room to run the query for
+
+        Returns:
+                A mapping from user ID to ProfileInfo.
+        """
+
+        def _get_subset_users_in_room_with_profiles(
+            txn: LoggingTransaction,
+        ) -> Dict[str, ProfileInfo]:
+            clause, ids = make_in_list_sql_clause(
+                self.database_engine, "m.user_id", user_ids
+            )
+
+            sql = """
+                SELECT state_key, display_name, avatar_url FROM room_memberships as m
+                INNER JOIN current_state_events as c
+                ON m.event_id = c.event_id
+                AND m.room_id = c.room_id
+                AND m.user_id = c.state_key
+                WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s
+            """ % (
+                clause,
+            )
+            txn.execute(sql, (room_id, Membership.JOIN, *ids))
+
+            return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn}
+
+        return await self.db_pool.runInteraction(
+            "get_subset_users_in_room_with_profiles",
+            _get_subset_users_in_room_with_profiles,
+        )
+
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room_with_profiles(
         self, room_id: str
@@ -337,6 +391,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             "get_room_summary", _get_room_summary_txn
         )
 
+    @cached()
+    async def get_number_joined_users_in_room(self, room_id: str) -> int:
+        return await self.db_pool.simple_select_one_onecol(
+            table="current_state_events",
+            keyvalues={"room_id": room_id, "membership": Membership.JOIN},
+            retcol="COUNT(*)",
+            desc="get_number_joined_users_in_room",
+        )
+
     @cached()
     async def get_invited_rooms_for_local_user(
         self, user_id: str
@@ -416,6 +479,17 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         user_id: str,
         membership_list: List[str],
     ) -> List[RoomsForUser]:
+        """Get all the rooms for this *local* user where the membership for this user
+        matches one in the membership list.
+
+        Args:
+            user_id: The user ID.
+            membership_list: A list of synapse.api.constants.Membership
+                    values which the user must be in.
+
+        Returns:
+            The RoomsForUser that the user matches the membership types.
+        """
         # Paranoia check.
         if not self.hs.is_mine_id(user_id):
             raise Exception(
@@ -444,6 +518,18 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return results
 
+    @cached(iterable=True)
+    async def get_local_users_in_room(self, room_id: str) -> List[str]:
+        """
+        Retrieves a list of the current roommembers who are local to the server.
+        """
+        return await self.db_pool.simple_select_onecol(
+            table="local_current_membership",
+            keyvalues={"room_id": room_id, "membership": Membership.JOIN},
+            retcol="user_id",
+            desc="get_local_users_in_room",
+        )
+
     async def get_local_current_membership_for_user_in_room(
         self, user_id: str, room_id: str
     ) -> Tuple[Optional[str], Optional[str]]:
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index d19b1bb858..df7ffbe545 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -709,7 +709,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(33, channel.resource_usage.db_txn_count)
+        self.assertEqual(37, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -722,7 +722,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(37, channel.resource_usage.db_txn_count)
+        self.assertEqual(41, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id

From 6173d585df189a763256ed6dc4fcfb5aa26e5e5c Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Tue, 12 Jul 2022 11:26:25 +0100
Subject: [PATCH 100/178] 1.63.0rc1

---
 CHANGES.md                | 80 +++++++++++++++++++++++++++++++++++++++
 changelog.d/13028.misc    |  1 -
 changelog.d/13029.doc     |  1 -
 changelog.d/13031.feature |  1 -
 changelog.d/13032.doc     |  1 -
 changelog.d/13044.misc    |  1 -
 changelog.d/13077.doc     |  3 --
 changelog.d/13078.misc    |  1 -
 changelog.d/13079.misc    |  1 -
 changelog.d/13086.doc     |  1 -
 changelog.d/13100.misc    |  1 -
 changelog.d/13103.misc    |  1 -
 changelog.d/13113.misc    |  1 -
 changelog.d/13116.doc     |  1 -
 changelog.d/13119.misc    |  1 -
 changelog.d/13125.feature |  1 -
 changelog.d/13127.misc    |  1 -
 changelog.d/13129.misc    |  1 -
 changelog.d/13131.bugfix  |  1 -
 changelog.d/13132.doc     |  1 -
 changelog.d/13134.misc    |  1 -
 changelog.d/13135.misc    |  1 -
 changelog.d/13136.misc    |  1 -
 changelog.d/13139.doc     |  1 -
 changelog.d/13143.misc    |  1 -
 changelog.d/13144.misc    |  1 -
 changelog.d/13145.misc    |  1 -
 changelog.d/13148.feature |  1 -
 changelog.d/13151.misc    |  1 -
 changelog.d/13152.misc    |  1 -
 changelog.d/13153.misc    |  1 -
 changelog.d/13157.misc    |  1 -
 changelog.d/13158.misc    |  1 -
 changelog.d/13159.misc    |  1 -
 changelog.d/13166.doc     |  1 -
 changelog.d/13167.misc    |  1 -
 changelog.d/13174.bugfix  |  1 -
 changelog.d/13194.bugfix  |  1 -
 changelog.d/13195.misc    |  1 -
 changelog.d/13197.bugfix  |  1 -
 changelog.d/13200.removal |  1 -
 changelog.d/13207.docker  |  1 -
 changelog.d/13209.misc    |  1 -
 changelog.d/13210.misc    |  1 -
 changelog.d/13211.misc    |  1 -
 changelog.d/13212.doc     |  1 -
 changelog.d/13222.misc    |  1 -
 changelog.d/13223.bugfix  |  1 -
 changelog.d/13226.bugfix  |  1 -
 changelog.d/13228.misc    |  1 -
 changelog.d/13235.bugfix  |  1 -
 changelog.d/13236.bugfix  |  1 -
 debian/changelog          |  6 +++
 pyproject.toml            |  2 +-
 54 files changed, 87 insertions(+), 54 deletions(-)
 delete mode 100644 changelog.d/13028.misc
 delete mode 100644 changelog.d/13029.doc
 delete mode 100644 changelog.d/13031.feature
 delete mode 100644 changelog.d/13032.doc
 delete mode 100644 changelog.d/13044.misc
 delete mode 100644 changelog.d/13077.doc
 delete mode 100644 changelog.d/13078.misc
 delete mode 100644 changelog.d/13079.misc
 delete mode 100644 changelog.d/13086.doc
 delete mode 100644 changelog.d/13100.misc
 delete mode 100644 changelog.d/13103.misc
 delete mode 100644 changelog.d/13113.misc
 delete mode 100644 changelog.d/13116.doc
 delete mode 100644 changelog.d/13119.misc
 delete mode 100644 changelog.d/13125.feature
 delete mode 100644 changelog.d/13127.misc
 delete mode 100644 changelog.d/13129.misc
 delete mode 100644 changelog.d/13131.bugfix
 delete mode 100644 changelog.d/13132.doc
 delete mode 100644 changelog.d/13134.misc
 delete mode 100644 changelog.d/13135.misc
 delete mode 100644 changelog.d/13136.misc
 delete mode 100644 changelog.d/13139.doc
 delete mode 100644 changelog.d/13143.misc
 delete mode 100644 changelog.d/13144.misc
 delete mode 100644 changelog.d/13145.misc
 delete mode 100644 changelog.d/13148.feature
 delete mode 100644 changelog.d/13151.misc
 delete mode 100644 changelog.d/13152.misc
 delete mode 100644 changelog.d/13153.misc
 delete mode 100644 changelog.d/13157.misc
 delete mode 100644 changelog.d/13158.misc
 delete mode 100644 changelog.d/13159.misc
 delete mode 100644 changelog.d/13166.doc
 delete mode 100644 changelog.d/13167.misc
 delete mode 100644 changelog.d/13174.bugfix
 delete mode 100644 changelog.d/13194.bugfix
 delete mode 100644 changelog.d/13195.misc
 delete mode 100644 changelog.d/13197.bugfix
 delete mode 100644 changelog.d/13200.removal
 delete mode 100644 changelog.d/13207.docker
 delete mode 100644 changelog.d/13209.misc
 delete mode 100644 changelog.d/13210.misc
 delete mode 100644 changelog.d/13211.misc
 delete mode 100644 changelog.d/13212.doc
 delete mode 100644 changelog.d/13222.misc
 delete mode 100644 changelog.d/13223.bugfix
 delete mode 100644 changelog.d/13226.bugfix
 delete mode 100644 changelog.d/13228.misc
 delete mode 100644 changelog.d/13235.bugfix
 delete mode 100644 changelog.d/13236.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index ec27cda1b2..ee2f90632f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,83 @@
+Synapse 1.63.0rc1 (2022-07-12)
+==============================
+
+Features
+--------
+
+- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
+- Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125))
+- Improve validation logic in Synapse's REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148))
+
+
+Bugfixes
+--------
+
+- Fix application service not being able to join remote federated room without a profile set. ([\#13131](https://github.com/matrix-org/synapse/issues/13131))
+- Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages. ([\#13174](https://github.com/matrix-org/synapse/issues/13174))
+- Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0. ([\#13194](https://github.com/matrix-org/synapse/issues/13194))
+- Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197))
+- Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0. ([\#13223](https://github.com/matrix-org/synapse/issues/13223))
+- Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. ([\#13226](https://github.com/matrix-org/synapse/issues/13226))
+- Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted. ([\#13235](https://github.com/matrix-org/synapse/issues/13235))
+- Fix appservices not receiving room-less EDUs, like presence, if enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236))
+
+
+Updates to the Docker image
+---------------------------
+
+- Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1. ([\#13207](https://github.com/matrix-org/synapse/issues/13207))
+
+
+Improved Documentation
+----------------------
+
+- Add an explanation of the `--report-stats` argument to the docs. ([\#13029](https://github.com/matrix-org/synapse/issues/13029))
+- Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. ([\#13032](https://github.com/matrix-org/synapse/issues/13032))
+- Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077))
+- Add documentation for anonymised homeserver statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086))
+- Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. ([\#13116](https://github.com/matrix-org/synapse/issues/13116))
+- Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132))
+- Add a link to the configuration manual from the homeserver sample config documentation. ([\#13139](https://github.com/matrix-org/synapse/issues/13139))
+- Add missing links to config options. ([\#13166](https://github.com/matrix-org/synapse/issues/13166))
+- Add documentation for the existing `databases` option in the homeserver configuration manual. ([\#13212](https://github.com/matrix-org/synapse/issues/13212))
+
+
+Deprecations and Removals
+-------------------------
+
+- Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar. ([\#13200](https://github.com/matrix-org/synapse/issues/13200))
+
+
+Internal Changes
+----------------
+
+- Add type annotations to `tests.utils`. ([\#13028](https://github.com/matrix-org/synapse/issues/13028))
+- Support temporary experimental return values for spam checker module callbacks. ([\#13044](https://github.com/matrix-org/synapse/issues/13044))
+- Reduce memory consumption when processing incoming events in large rooms. ([\#13078](https://github.com/matrix-org/synapse/issues/13078), [\#13222](https://github.com/matrix-org/synapse/issues/13222))
+- Enable Complement testing in the 'Twisted Trunk' CI runs. ([\#13079](https://github.com/matrix-org/synapse/issues/13079), [\#13157](https://github.com/matrix-org/synapse/issues/13157))
+- Faster room joins: Handle race between persisting an event and un-partial stating a room. ([\#13100](https://github.com/matrix-org/synapse/issues/13100))
+- Add missing type hints to `synapse.logging`. ([\#13103](https://github.com/matrix-org/synapse/issues/13103))
+- Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. ([\#13113](https://github.com/matrix-org/synapse/issues/13113))
+- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13119](https://github.com/matrix-org/synapse/issues/13119), [\#13153](https://github.com/matrix-org/synapse/issues/13153))
+- Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. ([\#13127](https://github.com/matrix-org/synapse/issues/13127))
+- Only one-line SQL statements for logging and tracing. ([\#13129](https://github.com/matrix-org/synapse/issues/13129))
+- Apply ratelimiting earlier in processing of /send request. ([\#13134](https://github.com/matrix-org/synapse/issues/13134))
+- Enforce type annotations for `tests.test_server`. ([\#13135](https://github.com/matrix-org/synapse/issues/13135))
+- Add type annotations to `tests.server`. ([\#13136](https://github.com/matrix-org/synapse/issues/13136))
+- Add support to `complement.sh` for skipping the docker build. ([\#13143](https://github.com/matrix-org/synapse/issues/13143), [\#13158](https://github.com/matrix-org/synapse/issues/13158))
+- Faster joins: skip waiting for full state when processing incoming events over federation. ([\#13144](https://github.com/matrix-org/synapse/issues/13144))
+- Improve exception handling when processing events received over federation. ([\#13145](https://github.com/matrix-org/synapse/issues/13145))
+- Faster room joins: fix race in recalculation of current room state. ([\#13151](https://github.com/matrix-org/synapse/issues/13151))
+- Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. ([\#13152](https://github.com/matrix-org/synapse/issues/13152))
+- Improve and fix type hints. ([\#13159](https://github.com/matrix-org/synapse/issues/13159))
+- Update config used by Complement to allow device name lookup over federation. ([\#13167](https://github.com/matrix-org/synapse/issues/13167))
+- Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. ([\#13195](https://github.com/matrix-org/synapse/issues/13195))
+- Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). ([\#13209](https://github.com/matrix-org/synapse/issues/13209))
+- Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). ([\#13210](https://github.com/matrix-org/synapse/issues/13210))
+- More aggressively rotate push actions. ([\#13211](https://github.com/matrix-org/synapse/issues/13211))
+- Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. ([\#13228](https://github.com/matrix-org/synapse/issues/13228))
+
+
 Synapse 1.62.0 (2022-07-05)
 ===========================
 
diff --git a/changelog.d/13028.misc b/changelog.d/13028.misc
deleted file mode 100644
index 4e5f3d8f91..0000000000
--- a/changelog.d/13028.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type annotations to `tests.utils`.
diff --git a/changelog.d/13029.doc b/changelog.d/13029.doc
deleted file mode 100644
index d398f0fdbe..0000000000
--- a/changelog.d/13029.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add an explanation of the `--report-stats` argument to the docs.
diff --git a/changelog.d/13031.feature b/changelog.d/13031.feature
deleted file mode 100644
index fee8e9d1ff..0000000000
--- a/changelog.d/13031.feature
+++ /dev/null
@@ -1 +0,0 @@
-Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type.
diff --git a/changelog.d/13032.doc b/changelog.d/13032.doc
deleted file mode 100644
index 54d45ecd0d..0000000000
--- a/changelog.d/13032.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh.
diff --git a/changelog.d/13044.misc b/changelog.d/13044.misc
deleted file mode 100644
index f9a0669dd3..0000000000
--- a/changelog.d/13044.misc
+++ /dev/null
@@ -1 +0,0 @@
-Support temporary experimental return values for spam checker module callbacks.
\ No newline at end of file
diff --git a/changelog.d/13077.doc b/changelog.d/13077.doc
deleted file mode 100644
index 502f2d059e..0000000000
--- a/changelog.d/13077.doc
+++ /dev/null
@@ -1,3 +0,0 @@
-Clean up references to sample configuration and redirect users to the configuration manual instead.
-
-
diff --git a/changelog.d/13078.misc b/changelog.d/13078.misc
deleted file mode 100644
index 3835e97ad9..0000000000
--- a/changelog.d/13078.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce memory consumption when processing incoming events in large rooms.
diff --git a/changelog.d/13079.misc b/changelog.d/13079.misc
deleted file mode 100644
index 0133097c83..0000000000
--- a/changelog.d/13079.misc
+++ /dev/null
@@ -1 +0,0 @@
-Enable Complement testing in the 'Twisted Trunk' CI runs.
\ No newline at end of file
diff --git a/changelog.d/13086.doc b/changelog.d/13086.doc
deleted file mode 100644
index a3960ca325..0000000000
--- a/changelog.d/13086.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add documentation for anonymised homeserver statistics collection.
\ No newline at end of file
diff --git a/changelog.d/13100.misc b/changelog.d/13100.misc
deleted file mode 100644
index 28f2fe0349..0000000000
--- a/changelog.d/13100.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster room joins: Handle race between persisting an event and un-partial stating a room.
diff --git a/changelog.d/13103.misc b/changelog.d/13103.misc
deleted file mode 100644
index 4de5f9e905..0000000000
--- a/changelog.d/13103.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add missing type hints to `synapse.logging`.
diff --git a/changelog.d/13113.misc b/changelog.d/13113.misc
deleted file mode 100644
index 7b1a50eec0..0000000000
--- a/changelog.d/13113.misc
+++ /dev/null
@@ -1 +0,0 @@
-Raise a `DependencyError` on missing dependencies instead of a `ConfigError`.
\ No newline at end of file
diff --git a/changelog.d/13116.doc b/changelog.d/13116.doc
deleted file mode 100644
index f99be50f44..0000000000
--- a/changelog.d/13116.doc
+++ /dev/null
@@ -1 +0,0 @@
-Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation.
diff --git a/changelog.d/13119.misc b/changelog.d/13119.misc
deleted file mode 100644
index 3bb51962e7..0000000000
--- a/changelog.d/13119.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/changelog.d/13125.feature b/changelog.d/13125.feature
deleted file mode 100644
index 9b0f609541..0000000000
--- a/changelog.d/13125.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add a rate limit for local users sending invites.
\ No newline at end of file
diff --git a/changelog.d/13127.misc b/changelog.d/13127.misc
deleted file mode 100644
index 1414811e0a..0000000000
--- a/changelog.d/13127.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments.
\ No newline at end of file
diff --git a/changelog.d/13129.misc b/changelog.d/13129.misc
deleted file mode 100644
index 4c2dbb7057..0000000000
--- a/changelog.d/13129.misc
+++ /dev/null
@@ -1 +0,0 @@
-Only one-line SQL statements for logging and tracing.
diff --git a/changelog.d/13131.bugfix b/changelog.d/13131.bugfix
deleted file mode 100644
index 06602f03fe..0000000000
--- a/changelog.d/13131.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix application service not being able to join remote federated room without a profile set.
diff --git a/changelog.d/13132.doc b/changelog.d/13132.doc
deleted file mode 100644
index c577069294..0000000000
--- a/changelog.d/13132.doc
+++ /dev/null
@@ -1 +0,0 @@
-Document how the Synapse team does reviews.
diff --git a/changelog.d/13134.misc b/changelog.d/13134.misc
deleted file mode 100644
index e3e16056d1..0000000000
--- a/changelog.d/13134.misc
+++ /dev/null
@@ -1 +0,0 @@
-Apply ratelimiting earlier in processing of /send request.
\ No newline at end of file
diff --git a/changelog.d/13135.misc b/changelog.d/13135.misc
deleted file mode 100644
index f096dd8749..0000000000
--- a/changelog.d/13135.misc
+++ /dev/null
@@ -1 +0,0 @@
-Enforce type annotations for `tests.test_server`.
diff --git a/changelog.d/13136.misc b/changelog.d/13136.misc
deleted file mode 100644
index 6cf451d8cf..0000000000
--- a/changelog.d/13136.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type annotations to `tests.server`.
diff --git a/changelog.d/13139.doc b/changelog.d/13139.doc
deleted file mode 100644
index f5d99d461a..0000000000
--- a/changelog.d/13139.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add a link to the configuration manual from the homeserver sample config documentation.
diff --git a/changelog.d/13143.misc b/changelog.d/13143.misc
deleted file mode 100644
index 1cb77c02d7..0000000000
--- a/changelog.d/13143.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add support to `complement.sh` for skipping the docker build.
diff --git a/changelog.d/13144.misc b/changelog.d/13144.misc
deleted file mode 100644
index 34762e2fcd..0000000000
--- a/changelog.d/13144.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster joins: skip waiting for full state when processing incoming events over federation.
diff --git a/changelog.d/13145.misc b/changelog.d/13145.misc
deleted file mode 100644
index d5e2dba866..0000000000
--- a/changelog.d/13145.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve exception handling when processing events received over federation.
diff --git a/changelog.d/13148.feature b/changelog.d/13148.feature
deleted file mode 100644
index d1104b04b0..0000000000
--- a/changelog.d/13148.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation logic in Synapse's REST endpoints.
diff --git a/changelog.d/13151.misc b/changelog.d/13151.misc
deleted file mode 100644
index cfe3eed3a1..0000000000
--- a/changelog.d/13151.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster room joins: fix race in recalculation of current room state.
diff --git a/changelog.d/13152.misc b/changelog.d/13152.misc
deleted file mode 100644
index 0c919ab700..0000000000
--- a/changelog.d/13152.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`.
\ No newline at end of file
diff --git a/changelog.d/13153.misc b/changelog.d/13153.misc
deleted file mode 100644
index 3bb51962e7..0000000000
--- a/changelog.d/13153.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room.
diff --git a/changelog.d/13157.misc b/changelog.d/13157.misc
deleted file mode 100644
index 0133097c83..0000000000
--- a/changelog.d/13157.misc
+++ /dev/null
@@ -1 +0,0 @@
-Enable Complement testing in the 'Twisted Trunk' CI runs.
\ No newline at end of file
diff --git a/changelog.d/13158.misc b/changelog.d/13158.misc
deleted file mode 100644
index 1cb77c02d7..0000000000
--- a/changelog.d/13158.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add support to `complement.sh` for skipping the docker build.
diff --git a/changelog.d/13159.misc b/changelog.d/13159.misc
deleted file mode 100644
index bb5554ebe0..0000000000
--- a/changelog.d/13159.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve and fix type hints.
\ No newline at end of file
diff --git a/changelog.d/13166.doc b/changelog.d/13166.doc
deleted file mode 100644
index 2d92e341ed..0000000000
--- a/changelog.d/13166.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add missing links to config options.
diff --git a/changelog.d/13167.misc b/changelog.d/13167.misc
deleted file mode 100644
index a7c7a688de..0000000000
--- a/changelog.d/13167.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update config used by Complement to allow device name lookup over federation.
\ No newline at end of file
diff --git a/changelog.d/13174.bugfix b/changelog.d/13174.bugfix
deleted file mode 100644
index b17935b93f..0000000000
--- a/changelog.d/13174.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages.
diff --git a/changelog.d/13194.bugfix b/changelog.d/13194.bugfix
deleted file mode 100644
index 2c2e8bb21b..0000000000
--- a/changelog.d/13194.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0.
diff --git a/changelog.d/13195.misc b/changelog.d/13195.misc
deleted file mode 100644
index 5506f767b3..0000000000
--- a/changelog.d/13195.misc
+++ /dev/null
@@ -1 +0,0 @@
-Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script.
\ No newline at end of file
diff --git a/changelog.d/13197.bugfix b/changelog.d/13197.bugfix
deleted file mode 100644
index 8417241523..0000000000
--- a/changelog.d/13197.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation.
diff --git a/changelog.d/13200.removal b/changelog.d/13200.removal
deleted file mode 100644
index 755f5eb192..0000000000
--- a/changelog.d/13200.removal
+++ /dev/null
@@ -1 +0,0 @@
-Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar.
diff --git a/changelog.d/13207.docker b/changelog.d/13207.docker
deleted file mode 100644
index 63ba5c8031..0000000000
--- a/changelog.d/13207.docker
+++ /dev/null
@@ -1 +0,0 @@
-Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1.
diff --git a/changelog.d/13209.misc b/changelog.d/13209.misc
deleted file mode 100644
index cb0b8b4e63..0000000000
--- a/changelog.d/13209.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/changelog.d/13210.misc b/changelog.d/13210.misc
deleted file mode 100644
index 407791b8e5..0000000000
--- a/changelog.d/13210.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/changelog.d/13211.misc b/changelog.d/13211.misc
deleted file mode 100644
index 4d2a6dec65..0000000000
--- a/changelog.d/13211.misc
+++ /dev/null
@@ -1 +0,0 @@
-More aggressively rotate push actions.
diff --git a/changelog.d/13212.doc b/changelog.d/13212.doc
deleted file mode 100644
index e6b65d826f..0000000000
--- a/changelog.d/13212.doc
+++ /dev/null
@@ -1 +0,0 @@
-Add documentation for the existing `databases` option in the homeserver configuration manual.
diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc
deleted file mode 100644
index 3835e97ad9..0000000000
--- a/changelog.d/13222.misc
+++ /dev/null
@@ -1 +0,0 @@
-Reduce memory consumption when processing incoming events in large rooms.
diff --git a/changelog.d/13223.bugfix b/changelog.d/13223.bugfix
deleted file mode 100644
index 6ee3aed910..0000000000
--- a/changelog.d/13223.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0.
diff --git a/changelog.d/13226.bugfix b/changelog.d/13226.bugfix
deleted file mode 100644
index df96d41f37..0000000000
--- a/changelog.d/13226.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids.
diff --git a/changelog.d/13228.misc b/changelog.d/13228.misc
deleted file mode 100644
index fec086557e..0000000000
--- a/changelog.d/13228.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper.
diff --git a/changelog.d/13235.bugfix b/changelog.d/13235.bugfix
deleted file mode 100644
index 5c31fbc775..0000000000
--- a/changelog.d/13235.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted.
\ No newline at end of file
diff --git a/changelog.d/13236.bugfix b/changelog.d/13236.bugfix
deleted file mode 100644
index 7fddc4413d..0000000000
--- a/changelog.d/13236.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix appservices not receiving room-less EDUs, like presence, if enabled.
\ No newline at end of file
diff --git a/debian/changelog b/debian/changelog
index 520d8d20ae..9f4352586d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.63.0~rc1) stable; urgency=medium
+
+  * New Synapse release 1.63.0rc1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 12 Jul 2022 11:26:02 +0100
+
 matrix-synapse-py3 (1.62.0) stable; urgency=medium
 
   * New Synapse release 1.62.0.
diff --git a/pyproject.toml b/pyproject.toml
index 4d1007fcb3..f77c02ca27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.62.0"
+version = "1.63.0rc1"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From ac7aec0cd3d22e3013b167bc397ed67dec1a623b Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Tue, 12 Jul 2022 12:52:47 +0100
Subject: [PATCH 101/178] Reorder and tidy up changelog

---
 CHANGES.md | 56 +++++++++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index ee2f90632f..c3d6a18a97 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,22 +4,22 @@ Synapse 1.63.0rc1 (2022-07-12)
 Features
 --------
 
-- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
 - Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125))
+- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
 - Improve validation logic in Synapse's REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148))
 
 
 Bugfixes
 --------
 
-- Fix application service not being able to join remote federated room without a profile set. ([\#13131](https://github.com/matrix-org/synapse/issues/13131))
-- Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages. ([\#13174](https://github.com/matrix-org/synapse/issues/13174))
-- Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0. ([\#13194](https://github.com/matrix-org/synapse/issues/13194))
-- Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197))
-- Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0. ([\#13223](https://github.com/matrix-org/synapse/issues/13223))
+- Fix a long-standing bug where application services were not able to join remote federated rooms without a profile. ([\#13131](https://github.com/matrix-org/synapse/issues/13131))
+- Fix a long-standing bug where `_get_state_map_for_room` might raise errors when third party event rules callbacks are present. ([\#13174](https://github.com/matrix-org/synapse/issues/13174))
 - Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. ([\#13226](https://github.com/matrix-org/synapse/issues/13226))
-- Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted. ([\#13235](https://github.com/matrix-org/synapse/issues/13235))
-- Fix appservices not receiving room-less EDUs, like presence, if enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236))
+- Fix a bug introduced in 1.54.0 where appservices would not receive room-less EDUs, like presence, if enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236))
+- Fix a bug introduced in 1.62.0 where rows were not deleted from `event_push_actions` table on large servers. ([\#13194](https://github.com/matrix-org/synapse/issues/13194))
+- Fix a bug introduced in 1.62.0 where notification counts would get stuck after a highlighted message. ([\#13223](https://github.com/matrix-org/synapse/issues/13223))
+- Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197))
+- Fix [MSC3202](https://github.com/matrix-org/matrix-spec-proposals/pull/3202)-enabled appservices not receiving to-device messages, preventing messages from being decrypted. ([\#13235](https://github.com/matrix-org/synapse/issues/13235))
 
 
 Updates to the Docker image
@@ -33,13 +33,12 @@ Improved Documentation
 
 - Add an explanation of the `--report-stats` argument to the docs. ([\#13029](https://github.com/matrix-org/synapse/issues/13029))
 - Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. ([\#13032](https://github.com/matrix-org/synapse/issues/13032))
-- Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077))
-- Add documentation for anonymised homeserver statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086))
-- Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. ([\#13116](https://github.com/matrix-org/synapse/issues/13116))
-- Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132))
-- Add a link to the configuration manual from the homeserver sample config documentation. ([\#13139](https://github.com/matrix-org/synapse/issues/13139))
 - Add missing links to config options. ([\#13166](https://github.com/matrix-org/synapse/issues/13166))
+- Add documentation for anonymised homeserver statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086))
 - Add documentation for the existing `databases` option in the homeserver configuration manual. ([\#13212](https://github.com/matrix-org/synapse/issues/13212))
+- Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077), [\#13139](https://github.com/matrix-org/synapse/issues/13139))
+- Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132))
+- Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. ([\#13116](https://github.com/matrix-org/synapse/issues/13116))
 
 
 Deprecations and Removals
@@ -51,27 +50,24 @@ Deprecations and Removals
 Internal Changes
 ----------------
 
-- Add type annotations to `tests.utils`. ([\#13028](https://github.com/matrix-org/synapse/issues/13028))
-- Support temporary experimental return values for spam checker module callbacks. ([\#13044](https://github.com/matrix-org/synapse/issues/13044))
-- Reduce memory consumption when processing incoming events in large rooms. ([\#13078](https://github.com/matrix-org/synapse/issues/13078), [\#13222](https://github.com/matrix-org/synapse/issues/13222))
-- Enable Complement testing in the 'Twisted Trunk' CI runs. ([\#13079](https://github.com/matrix-org/synapse/issues/13079), [\#13157](https://github.com/matrix-org/synapse/issues/13157))
-- Faster room joins: Handle race between persisting an event and un-partial stating a room. ([\#13100](https://github.com/matrix-org/synapse/issues/13100))
-- Add missing type hints to `synapse.logging`. ([\#13103](https://github.com/matrix-org/synapse/issues/13103))
-- Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. ([\#13113](https://github.com/matrix-org/synapse/issues/13113))
-- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13119](https://github.com/matrix-org/synapse/issues/13119), [\#13153](https://github.com/matrix-org/synapse/issues/13153))
-- Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. ([\#13127](https://github.com/matrix-org/synapse/issues/13127))
-- Only one-line SQL statements for logging and tracing. ([\#13129](https://github.com/matrix-org/synapse/issues/13129))
-- Apply ratelimiting earlier in processing of /send request. ([\#13134](https://github.com/matrix-org/synapse/issues/13134))
+- Add type annotations to `synapse.logging`, `tests.server` and `tests.utils`. ([\#13028](https://github.com/matrix-org/synapse/issues/13028), [\#13103](https://github.com/matrix-org/synapse/issues/13103), [\#13159](https://github.com/matrix-org/synapse/issues/13159), [\#13136](https://github.com/matrix-org/synapse/issues/13136))
 - Enforce type annotations for `tests.test_server`. ([\#13135](https://github.com/matrix-org/synapse/issues/13135))
-- Add type annotations to `tests.server`. ([\#13136](https://github.com/matrix-org/synapse/issues/13136))
+- Support temporary experimental return values for spam checker module callbacks. ([\#13044](https://github.com/matrix-org/synapse/issues/13044))
 - Add support to `complement.sh` for skipping the docker build. ([\#13143](https://github.com/matrix-org/synapse/issues/13143), [\#13158](https://github.com/matrix-org/synapse/issues/13158))
-- Faster joins: skip waiting for full state when processing incoming events over federation. ([\#13144](https://github.com/matrix-org/synapse/issues/13144))
-- Improve exception handling when processing events received over federation. ([\#13145](https://github.com/matrix-org/synapse/issues/13145))
-- Faster room joins: fix race in recalculation of current room state. ([\#13151](https://github.com/matrix-org/synapse/issues/13151))
-- Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. ([\#13152](https://github.com/matrix-org/synapse/issues/13152))
-- Improve and fix type hints. ([\#13159](https://github.com/matrix-org/synapse/issues/13159))
+- Add support to `complement.sh` for setting the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment variable. ([\#13152](https://github.com/matrix-org/synapse/issues/13152))
+- Enable Complement testing in the 'Twisted Trunk' CI runs. ([\#13079](https://github.com/matrix-org/synapse/issues/13079), [\#13157](https://github.com/matrix-org/synapse/issues/13157))
+- Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. ([\#13127](https://github.com/matrix-org/synapse/issues/13127))
 - Update config used by Complement to allow device name lookup over federation. ([\#13167](https://github.com/matrix-org/synapse/issues/13167))
+- Faster room joins: handle race between persisting an event and un-partial stating a room. ([\#13100](https://github.com/matrix-org/synapse/issues/13100))
+- Faster room joins: fix race in recalculation of current room state. ([\#13151](https://github.com/matrix-org/synapse/issues/13151))
+- Faster room joins: skip waiting for full state when processing incoming events over federation. ([\#13144](https://github.com/matrix-org/synapse/issues/13144))
+- Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. ([\#13113](https://github.com/matrix-org/synapse/issues/13113))
+- Avoid stripping line breaks from SQL sent to the database. ([\#13129](https://github.com/matrix-org/synapse/issues/13129))
+- Apply ratelimiting earlier in processing of `/send` requests. ([\#13134](https://github.com/matrix-org/synapse/issues/13134))
+- Improve exception handling when processing events received over federation. ([\#13145](https://github.com/matrix-org/synapse/issues/13145))
 - Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. ([\#13195](https://github.com/matrix-org/synapse/issues/13195))
+- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13119](https://github.com/matrix-org/synapse/issues/13119), [\#13153](https://github.com/matrix-org/synapse/issues/13153))
+- Reduce memory consumption when processing incoming events in large rooms. ([\#13078](https://github.com/matrix-org/synapse/issues/13078), [\#13222](https://github.com/matrix-org/synapse/issues/13222))
 - Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). ([\#13209](https://github.com/matrix-org/synapse/issues/13209))
 - Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). ([\#13210](https://github.com/matrix-org/synapse/issues/13210))
 - More aggressively rotate push actions. ([\#13211](https://github.com/matrix-org/synapse/issues/13211))

From f14c63213492f512f451b295331a18dbb28685c6 Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Tue, 12 Jul 2022 13:01:42 +0100
Subject: [PATCH 102/178] Update changelog once more

---
 CHANGES.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index c3d6a18a97..4071f973de 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -5,7 +5,7 @@ Features
 --------
 
 - Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125))
-- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
+- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of `/publicRooms` by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
 - Improve validation logic in Synapse's REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148))
 
 
@@ -15,7 +15,7 @@ Bugfixes
 - Fix a long-standing bug where application services were not able to join remote federated rooms without a profile. ([\#13131](https://github.com/matrix-org/synapse/issues/13131))
 - Fix a long-standing bug where `_get_state_map_for_room` might raise errors when third party event rules callbacks are present. ([\#13174](https://github.com/matrix-org/synapse/issues/13174))
 - Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. ([\#13226](https://github.com/matrix-org/synapse/issues/13226))
-- Fix a bug introduced in 1.54.0 where appservices would not receive room-less EDUs, like presence, if enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236))
+- Fix a bug introduced in 1.54.0 where appservices would not receive room-less EDUs, like presence, when both [MSC2409](https://github.com/matrix-org/matrix-spec-proposals/pull/2409) and [MSC3202](https://github.com/matrix-org/matrix-spec-proposals/pull/3202) are enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236))
 - Fix a bug introduced in 1.62.0 where rows were not deleted from `event_push_actions` table on large servers. ([\#13194](https://github.com/matrix-org/synapse/issues/13194))
 - Fix a bug introduced in 1.62.0 where notification counts would get stuck after a highlighted message. ([\#13223](https://github.com/matrix-org/synapse/issues/13223))
 - Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197))

From 2d82cdafd23b5bcb597e776537e23c367e18d4ac Mon Sep 17 00:00:00 2001
From: andrew do <dohandrew@protonmail.com>
Date: Tue, 12 Jul 2022 07:30:53 -0700
Subject: [PATCH 103/178] expose whether a room is a space in the Admin API
 (#13208)

---
 changelog.d/13208.feature              |  1 +
 docs/admin_api/rooms.md                | 29 +++++++++++++++++++-------
 synapse/storage/databases/main/room.py |  6 ++++--
 tests/rest/admin/test_room.py          | 13 +++++++++---
 4 files changed, 36 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/13208.feature

diff --git a/changelog.d/13208.feature b/changelog.d/13208.feature
new file mode 100644
index 0000000000..b0c5f090ee
--- /dev/null
+++ b/changelog.d/13208.feature
@@ -0,0 +1 @@
+Add a `room_type` field in the responses for the list room and room details admin API. Contributed by @andrewdoh.
\ No newline at end of file
diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md
index d4873f9490..9aa489e4a3 100644
--- a/docs/admin_api/rooms.md
+++ b/docs/admin_api/rooms.md
@@ -59,6 +59,7 @@ The following fields are possible in the JSON response body:
     - `guest_access` - Whether guests can join the room. One of: ["can_join", "forbidden"].
     - `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"].
     - `state_events` - Total number of state_events of a room. Complexity of the room.
+    - `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space. If the room does not define a type, the value will be `null`.
 * `offset` - The current pagination offset in rooms. This parameter should be
              used instead of `next_token` for room offset as `next_token` is
              not intended to be parsed.
@@ -101,7 +102,8 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 93534
+      "state_events": 93534,
+      "room_type": "m.space"
     },
     ... (8 hidden items) ...
     {
@@ -118,7 +120,8 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 8345
+      "state_events": 8345,
+      "room_type": null
     }
   ],
   "offset": 0,
@@ -151,7 +154,8 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 8
+      "state_events": 8,
+      "room_type": null
     }
   ],
   "offset": 0,
@@ -184,7 +188,8 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 93534
+      "state_events": 93534,
+      "room_type": null
     },
     ... (98 hidden items) ...
     {
@@ -201,7 +206,8 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 8345
+      "state_events": 8345,
+      "room_type": "m.space"
     }
   ],
   "offset": 0,
@@ -238,7 +244,9 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 93534
+      "state_events": 93534,
+      "room_type": "m.space"
+
     },
     ... (48 hidden items) ...
     {
@@ -255,7 +263,9 @@ A response body like the following is returned:
       "join_rules": "invite",
       "guest_access": null,
       "history_visibility": "shared",
-      "state_events": 8345
+      "state_events": 8345,
+      "room_type": null
+
     }
   ],
   "offset": 100,
@@ -290,6 +300,8 @@ The following fields are possible in the JSON response body:
 * `guest_access` - Whether guests can join the room. One of: ["can_join", "forbidden"].
 * `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"].
 * `state_events` - Total number of state_events of a room. Complexity of the room.
+* `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space.
+  If the room does not define a type, the value will be `null`.
 
 The API is:
 
@@ -317,7 +329,8 @@ A response body like the following is returned:
   "join_rules": "invite",
   "guest_access": null,
   "history_visibility": "shared",
-  "state_events": 93534
+  "state_events": 93534,
+  "room_type": "m.space"
 }
 ```
 
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 13d6a1d5c0..d6d485507b 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -175,7 +175,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                   rooms.creator, state.encryption, state.is_federatable AS federatable,
                   rooms.is_public AS public, state.join_rules, state.guest_access,
                   state.history_visibility, curr.current_state_events AS state_events,
-                  state.avatar, state.topic
+                  state.avatar, state.topic, state.room_type
                 FROM rooms
                 LEFT JOIN room_stats_state state USING (room_id)
                 LEFT JOIN room_stats_current curr USING (room_id)
@@ -596,7 +596,8 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             SELECT state.room_id, state.name, state.canonical_alias, curr.joined_members,
               curr.local_users_in_room, rooms.room_version, rooms.creator,
               state.encryption, state.is_federatable, rooms.is_public, state.join_rules,
-              state.guest_access, state.history_visibility, curr.current_state_events
+              state.guest_access, state.history_visibility, curr.current_state_events,
+              state.room_type
             FROM room_stats_state state
             INNER JOIN room_stats_current curr USING (room_id)
             INNER JOIN rooms USING (room_id)
@@ -646,6 +647,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                         "guest_access": room[11],
                         "history_visibility": room[12],
                         "state_events": room[13],
+                        "room_type": room[14],
                     }
                 )
 
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index 230dc76f72..2526136ff8 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -21,7 +21,7 @@ from parameterized import parameterized
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EventTypes, Membership, RoomTypes
 from synapse.api.errors import Codes
 from synapse.handlers.pagination import PaginationHandler
 from synapse.rest.client import directory, events, login, room
@@ -1130,6 +1130,8 @@ class RoomTestCase(unittest.HomeserverTestCase):
             self.assertIn("guest_access", r)
             self.assertIn("history_visibility", r)
             self.assertIn("state_events", r)
+            self.assertIn("room_type", r)
+            self.assertIsNone(r["room_type"])
 
         # Check that the correct number of total rooms was returned
         self.assertEqual(channel.json_body["total_rooms"], total_rooms)
@@ -1229,7 +1231,11 @@ class RoomTestCase(unittest.HomeserverTestCase):
     def test_correct_room_attributes(self) -> None:
         """Test the correct attributes for a room are returned"""
         # Create a test room
-        room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok)
+        room_id = self.helper.create_room_as(
+            self.admin_user,
+            tok=self.admin_user_tok,
+            extra_content={"creation_content": {"type": RoomTypes.SPACE}},
+        )
 
         test_alias = "#test:test"
         test_room_name = "something"
@@ -1306,6 +1312,7 @@ class RoomTestCase(unittest.HomeserverTestCase):
         self.assertEqual(room_id, r["room_id"])
         self.assertEqual(test_room_name, r["name"])
         self.assertEqual(test_alias, r["canonical_alias"])
+        self.assertEqual(RoomTypes.SPACE, r["room_type"])
 
     def test_room_list_sort_order(self) -> None:
         """Test room list sort ordering. alphabetical name versus number of members,
@@ -1630,7 +1637,7 @@ class RoomTestCase(unittest.HomeserverTestCase):
         self.assertIn("guest_access", channel.json_body)
         self.assertIn("history_visibility", channel.json_body)
         self.assertIn("state_events", channel.json_body)
-
+        self.assertIn("room_type", channel.json_body)
         self.assertEqual(room_id_1, channel.json_body["room_id"])
 
     def test_single_room_devices(self) -> None:

From b19060a29b4f73897847db2aba5d03ec819086e0 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 12 Jul 2022 12:06:29 -0500
Subject: [PATCH 104/178] Make the AS login method call `Auth.get_user_by_req`
 for checking the AS token. (#13094)

This gets rid of another usage of get_appservice_by_req, with all the benefits, including correctly tracking the appservice IP and setting the tracing attributes correctly.

Signed-off-by: Quentin Gliech <quenting@element.io>
---
 changelog.d/13094.misc       |  1 +
 synapse/rest/client/login.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13094.misc

diff --git a/changelog.d/13094.misc b/changelog.d/13094.misc
new file mode 100644
index 0000000000..f1e55ae476
--- /dev/null
+++ b/changelog.d/13094.misc
@@ -0,0 +1 @@
+Make the AS login method call `Auth.get_user_by_req` for checking the AS token.
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index dd75e40f34..0437c87d8d 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -28,7 +28,7 @@ from typing import (
 
 from typing_extensions import TypedDict
 
-from synapse.api.errors import Codes, LoginError, SynapseError
+from synapse.api.errors import Codes, InvalidClientTokenError, LoginError, SynapseError
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.urls import CLIENT_API_PREFIX
 from synapse.appservice import ApplicationService
@@ -172,7 +172,13 @@ class LoginRestServlet(RestServlet):
 
         try:
             if login_submission["type"] == LoginRestServlet.APPSERVICE_TYPE:
-                appservice = self.auth.get_appservice_by_req(request)
+                requester = await self.auth.get_user_by_req(request)
+                appservice = requester.app_service
+
+                if appservice is None:
+                    raise InvalidClientTokenError(
+                        "This login method is only valid for application services"
+                    )
 
                 if appservice.is_rate_limited():
                     await self._address_ratelimiter.ratelimit(

From 6f30eb5b8e3d88d1b44cc7f9d7e548b30081d7e6 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 12 Jul 2022 10:48:47 -0700
Subject: [PATCH 105/178] Add info about configuration in the url preview docs
 (#13233)

Cross-link doc pages for easier navigation.
---
 changelog.d/13233.doc            | 2 ++
 docs/development/url_previews.md | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/13233.doc

diff --git a/changelog.d/13233.doc b/changelog.d/13233.doc
new file mode 100644
index 0000000000..b6babd7f15
--- /dev/null
+++ b/changelog.d/13233.doc
@@ -0,0 +1,2 @@
+Add a link to configuration instructions in the URL preview documentation.
+
diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md
index 154b9a5e12..25f189683d 100644
--- a/docs/development/url_previews.md
+++ b/docs/development/url_previews.md
@@ -1,5 +1,6 @@
 URL Previews
 ============
+For information on how to enable URL previews in synapse, please see the [config manual](../usage/configuration/config_documentation.md#url_preview_enabled).
 
 The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
 for URLs which outputs [Open Graph](https://ogp.me/) responses (with some Matrix

From 3f178332d68cb723150d0e392ed92780e4e5a610 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 12 Jul 2022 18:57:38 +0100
Subject: [PATCH 106/178] Log the stack when waiting for an entire room to be
 un-partial stated (#13257)

The stack is already logged when waiting for an event to be un-partial
stated. Log the stack for rooms as well, to aid in debugging.
---
 changelog.d/13257.misc                               | 1 +
 synapse/storage/util/partial_state_events_tracker.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/13257.misc

diff --git a/changelog.d/13257.misc b/changelog.d/13257.misc
new file mode 100644
index 0000000000..5fc1388520
--- /dev/null
+++ b/changelog.d/13257.misc
@@ -0,0 +1 @@
+Log the stack when waiting for an entire room to be un-partial stated.
diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py
index 211437cfaa..466e5137f2 100644
--- a/synapse/storage/util/partial_state_events_tracker.py
+++ b/synapse/storage/util/partial_state_events_tracker.py
@@ -166,6 +166,7 @@ class PartialCurrentStateTracker:
             logger.info(
                 "Awaiting un-partial-stating of room %s",
                 room_id,
+                stack_info=True,
             )
 
             await make_deferred_yieldable(d)

From fa71bb18b527d1a3e2629b48640ea67fff2f8c59 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 12 Jul 2022 19:18:53 +0100
Subject: [PATCH 107/178] Drop support for delegating email validation (#13192)

* Drop support for delegating email validation

Delegating email validation to an IS is insecure (since it allows the owner of
the IS to do a password reset on your HS), and has long been deprecated. It
will now cause a config error at startup.

* Update unit test which checks for email verification

Give it an `email` config instead of a threepid delegate

* Remove unused method `requestEmailToken`

* Simplify config handling for email verification

Rather than an enum and a boolean, all we need here is a single bool, which
says whether we are or are not doing email verification.

* update docs

* changelog

* upgrade.md: fix typo

* update version number

this will be in 1.64, not 1.63

* update version number

this one too
---
 CHANGES.md                                    |   7 +-
 changelog.d/13192.removal                     |   1 +
 docs/upgrade.md                               |  15 +++
 .../configuration/config_documentation.md     |  28 ++---
 synapse/app/homeserver.py                     |   3 +-
 synapse/config/emailconfig.py                 |  45 +-------
 synapse/config/registration.py                |  11 +-
 synapse/handlers/identity.py                  |  56 +--------
 synapse/handlers/ui_auth/checkers.py          |  21 +---
 synapse/rest/client/account.py                | 107 ++++++------------
 synapse/rest/client/register.py               |  59 ++++------
 synapse/rest/synapse/client/password_reset.py |   8 +-
 tests/rest/client/test_register.py            |   2 +-
 13 files changed, 110 insertions(+), 253 deletions(-)
 create mode 100644 changelog.d/13192.removal

diff --git a/CHANGES.md b/CHANGES.md
index 4071f973de..bcf9fae4a5 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,8 @@
+Synapse vNext
+=============
+
+As of this release, Synapse no longer allows the tasks of verifying email address ownership, and password reset confirmation, to be delegated to an identity server. For more information, see the [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.63/docs/upgrade.md#upgrading-to-v1630).
+
 Synapse 1.63.0rc1 (2022-07-12)
 ==============================
 
@@ -73,7 +78,6 @@ Internal Changes
 - More aggressively rotate push actions. ([\#13211](https://github.com/matrix-org/synapse/issues/13211))
 - Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. ([\#13228](https://github.com/matrix-org/synapse/issues/13228))
 
-
 Synapse 1.62.0 (2022-07-05)
 ===========================
 
@@ -81,7 +85,6 @@ No significant changes since 1.62.0rc3.
 
 Authors of spam-checker plugins should consult the [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.62/docs/upgrade.md#upgrading-to-v1620) to learn about the enriched signatures for spam checker callbacks, which are supported with this release of Synapse.
 
-
 Synapse 1.62.0rc3 (2022-07-04)
 ==============================
 
diff --git a/changelog.d/13192.removal b/changelog.d/13192.removal
new file mode 100644
index 0000000000..a7dffd1c48
--- /dev/null
+++ b/changelog.d/13192.removal
@@ -0,0 +1 @@
+Drop support for delegating email verification to an external server.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 312f0b87fe..3aaeb499ce 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -89,6 +89,21 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.64.0
+
+## Delegation of email validation no longer supported
+
+As of this version, Synapse no longer allows the tasks of verifying email address
+ownership, and password reset confirmation, to be delegated to an identity server.
+
+To continue to allow users to add email addresses to their homeserver accounts,
+and perform password resets, make sure that Synapse is configured with a
+working email server in the `email` configuration section (including, at a
+minimum, a `notif_from` setting.)
+
+Specifying an `email` setting under `account_threepid_delegates` will now cause
+an error at startup.
+
 # Upgrading to v1.62.0
 
 ## New signatures for spam checker callbacks
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 5deabb53d7..5fe502e33a 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2168,30 +2168,26 @@ default_identity_server: https://matrix.org
 ---
 ### `account_threepid_delegates`
 
-Handle threepid (email/phone etc) registration and password resets through a set of
-*trusted* identity servers. Note that this allows the configured identity server to
-reset passwords for accounts!
+Delegate verification of phone numbers to an identity server.
 
-Be aware that if `email` is not set, and SMTP options have not been
-configured in the email config block, registration and user password resets via
-email will be globally disabled.
+When a user wishes to add a phone number to their account, we need to verify that they
+actually own that phone number, which requires sending them a text message (SMS).
+Currently Synapse does not support sending those texts itself and instead delegates the
+task to an identity server. The base URI for the identity server to be used is
+specified by the `account_threepid_delegates.msisdn` option.
 
-Additionally, if `msisdn` is not set, registration and password resets via msisdn
-will be disabled regardless, and users will not be able to associate an msisdn
-identifier to their account. This is due to Synapse currently not supporting
-any method of sending SMS messages on its own.
+If this is left unspecified, Synapse will not allow users to add phone numbers to
+their account.
 
-To enable using an identity server for operations regarding a particular third-party
-identifier type, set the value to the URL of that identity server as shown in the
-examples below.
+(Servers handling the these requests must answer the `/requestToken` endpoints defined
+by the Matrix Identity Service API
+[specification](https://matrix.org/docs/spec/identity_service/latest).)
 
-Servers handling the these requests must answer the `/requestToken` endpoints defined
-by the Matrix Identity Service API [specification](https://matrix.org/docs/spec/identity_service/latest).
+*Updated in Synapse 1.64.0*: No longer accepts an `email` option.
 
 Example configuration:
 ```yaml
 account_threepid_delegates:
-    email: https://example.com     # Delegate email sending to example.com
     msisdn: http://localhost:8090  # Delegate SMS sending to this local process
 ```
 ---
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 745e704141..6bafa7d3f3 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -44,7 +44,6 @@ from synapse.app._base import (
     register_start,
 )
 from synapse.config._base import ConfigError, format_config_error
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import ListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
@@ -202,7 +201,7 @@ class SynapseHomeServer(HomeServer):
                 }
             )
 
-            if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+            if self.config.email.can_verify_email:
                 from synapse.rest.synapse.client.password_reset import (
                     PasswordResetSubmitTokenResource,
                 )
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index 6e11fbdb9a..3ead80d985 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -18,7 +18,6 @@
 import email.utils
 import logging
 import os
-from enum import Enum
 from typing import Any
 
 import attr
@@ -131,41 +130,22 @@ class EmailConfig(Config):
 
         self.email_enable_notifs = email_config.get("enable_notifs", False)
 
-        self.threepid_behaviour_email = (
-            # Have Synapse handle the email sending if account_threepid_delegates.email
-            # is not defined
-            # msisdn is currently always remote while Synapse does not support any method of
-            # sending SMS messages
-            ThreepidBehaviour.REMOTE
-            if self.root.registration.account_threepid_delegate_email
-            else ThreepidBehaviour.LOCAL
-        )
-
         if config.get("trust_identity_server_for_password_resets"):
             raise ConfigError(
                 'The config option "trust_identity_server_for_password_resets" '
-                'has been replaced by "account_threepid_delegate". '
-                "Please consult the configuration manual at docs/usage/configuration/config_documentation.md for "
-                "details and update your config file."
+                "is no longer supported. Please remove it from the config file."
             )
 
-        self.local_threepid_handling_disabled_due_to_email_config = False
-        if (
-            self.threepid_behaviour_email == ThreepidBehaviour.LOCAL
-            and email_config == {}
-        ):
-            # We cannot warn the user this has happened here
-            # Instead do so when a user attempts to reset their password
-            self.local_threepid_handling_disabled_due_to_email_config = True
-
-            self.threepid_behaviour_email = ThreepidBehaviour.OFF
+        # If we have email config settings, assume that we can verify ownership of
+        # email addresses.
+        self.can_verify_email = email_config != {}
 
         # Get lifetime of a validation token in milliseconds
         self.email_validation_token_lifetime = self.parse_duration(
             email_config.get("validation_token_lifetime", "1h")
         )
 
-        if self.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.can_verify_email:
             missing = []
             if not self.email_notif_from:
                 missing.append("email.notif_from")
@@ -356,18 +336,3 @@ class EmailConfig(Config):
                     "Config option email.invite_client_location must be a http or https URL",
                     path=("email", "invite_client_location"),
                 )
-
-
-class ThreepidBehaviour(Enum):
-    """
-    Enum to define the behaviour of Synapse with regards to when it contacts an identity
-    server for 3pid registration and password resets
-
-    REMOTE = use an external server to send tokens
-    LOCAL = send tokens ourselves
-    OFF = disable registration via 3pid and password resets
-    """
-
-    REMOTE = "remote"
-    LOCAL = "local"
-    OFF = "off"
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index fcf99be092..685a0423c5 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -20,6 +20,13 @@ from synapse.config._base import Config, ConfigError
 from synapse.types import JsonDict, RoomAlias, UserID
 from synapse.util.stringutils import random_string_with_symbols, strtobool
 
+NO_EMAIL_DELEGATE_ERROR = """\
+Delegation of email verification to an identity server is no longer supported. To
+continue to allow users to add email addresses to their accounts, and use them for
+password resets, configure Synapse with an SMTP server via the `email` setting, and
+remove `account_threepid_delegates.email`.
+"""
+
 
 class RegistrationConfig(Config):
     section = "registration"
@@ -51,7 +58,9 @@ class RegistrationConfig(Config):
         self.bcrypt_rounds = config.get("bcrypt_rounds", 12)
 
         account_threepid_delegates = config.get("account_threepid_delegates") or {}
-        self.account_threepid_delegate_email = account_threepid_delegates.get("email")
+        if "email" in account_threepid_delegates:
+            raise ConfigError(NO_EMAIL_DELEGATE_ERROR)
+        # self.account_threepid_delegate_email = account_threepid_delegates.get("email")
         self.account_threepid_delegate_msisdn = account_threepid_delegates.get("msisdn")
         self.default_identity_server = config.get("default_identity_server")
         self.allow_guest_access = config.get("allow_guest_access", False)
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 9bca2bc4b2..c70fdcc85e 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -26,7 +26,6 @@ from synapse.api.errors import (
     SynapseError,
 )
 from synapse.api.ratelimiting import Ratelimiter
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http import RequestTimedOutError
 from synapse.http.client import SimpleHttpClient
 from synapse.http.site import SynapseRequest
@@ -434,48 +433,6 @@ class IdentityHandler:
 
         return session_id
 
-    async def requestEmailToken(
-        self,
-        id_server: str,
-        email: str,
-        client_secret: str,
-        send_attempt: int,
-        next_link: Optional[str] = None,
-    ) -> JsonDict:
-        """
-        Request an external server send an email on our behalf for the purposes of threepid
-        validation.
-
-        Args:
-            id_server: The identity server to proxy to
-            email: The email to send the message to
-            client_secret: The unique client_secret sends by the user
-            send_attempt: Which attempt this is
-            next_link: A link to redirect the user to once they submit the token
-
-        Returns:
-            The json response body from the server
-        """
-        params = {
-            "email": email,
-            "client_secret": client_secret,
-            "send_attempt": send_attempt,
-        }
-        if next_link:
-            params["next_link"] = next_link
-
-        try:
-            data = await self.http_client.post_json_get_json(
-                id_server + "/_matrix/identity/api/v1/validate/email/requestToken",
-                params,
-            )
-            return data
-        except HttpResponseException as e:
-            logger.info("Proxied requestToken failed: %r", e)
-            raise e.to_synapse_error()
-        except RequestTimedOutError:
-            raise SynapseError(500, "Timed out contacting identity server")
-
     async def requestMsisdnToken(
         self,
         id_server: str,
@@ -549,18 +506,7 @@ class IdentityHandler:
         validation_session = None
 
         # Try to validate as email
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            # Remote emails will only be used if a valid identity server is provided.
-            assert (
-                self.hs.config.registration.account_threepid_delegate_email is not None
-            )
-
-            # Ask our delegated email identity server
-            validation_session = await self.threepid_from_creds(
-                self.hs.config.registration.account_threepid_delegate_email,
-                threepid_creds,
-            )
-        elif self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.hs.config.email.can_verify_email:
             # Get a validated session matching these details
             validation_session = await self.store.get_threepid_validation_session(
                 "email", client_secret, sid=sid, validated=True
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index 05cebb5d4d..a744d68c64 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -19,7 +19,6 @@ from twisted.web.client import PartialDownloadError
 
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes, LoginError, SynapseError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.util import json_decoder
 
 if TYPE_CHECKING:
@@ -153,7 +152,7 @@ class _BaseThreepidAuthChecker:
 
         logger.info("Getting validated threepid. threepidcreds: %r", (threepid_creds,))
 
-        # msisdns are currently always ThreepidBehaviour.REMOTE
+        # msisdns are currently always verified via the IS
         if medium == "msisdn":
             if not self.hs.config.registration.account_threepid_delegate_msisdn:
                 raise SynapseError(
@@ -164,18 +163,7 @@ class _BaseThreepidAuthChecker:
                 threepid_creds,
             )
         elif medium == "email":
-            if (
-                self.hs.config.email.threepid_behaviour_email
-                == ThreepidBehaviour.REMOTE
-            ):
-                assert self.hs.config.registration.account_threepid_delegate_email
-                threepid = await identity_handler.threepid_from_creds(
-                    self.hs.config.registration.account_threepid_delegate_email,
-                    threepid_creds,
-                )
-            elif (
-                self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL
-            ):
+            if self.hs.config.email.can_verify_email:
                 threepid = None
                 row = await self.store.get_threepid_validation_session(
                     medium,
@@ -227,10 +215,7 @@ class EmailIdentityAuthChecker(UserInteractiveAuthChecker, _BaseThreepidAuthChec
         _BaseThreepidAuthChecker.__init__(self, hs)
 
     def is_enabled(self) -> bool:
-        return self.hs.config.email.threepid_behaviour_email in (
-            ThreepidBehaviour.REMOTE,
-            ThreepidBehaviour.LOCAL,
-        )
+        return self.hs.config.email.can_verify_email
 
     async def check_auth(self, authdict: dict, clientip: str) -> Any:
         return await self._check_threepid("email", authdict)
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index bdc4a9c068..19c2da4244 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -28,7 +28,6 @@ from synapse.api.errors import (
     SynapseError,
     ThreepidValidationError,
 )
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.handlers.ui_auth import UIAuthSessionDataConstants
 from synapse.http.server import HttpServer, finish_request, respond_with_html
 from synapse.http.servlet import (
@@ -64,7 +63,7 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
         self.config = hs.config
         self.identity_handler = hs.get_identity_handler()
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -73,11 +72,10 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "User password resets have been disabled due to lack of email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "User password resets have been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based password resets have been disabled on this server"
             )
@@ -129,35 +127,21 @@ class EmailPasswordRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.requestEmailToken(
-                self.hs.config.registration.account_threepid_delegate_email,
-                email,
-                client_secret,
-                send_attempt,
-                next_link,
-            )
-        else:
-            # Send password reset emails from Synapse
-            sid = await self.identity_handler.send_threepid_validation(
-                email,
-                client_secret,
-                send_attempt,
-                self.mailer.send_password_reset_mail,
-                next_link,
-            )
-
-            # Wrap the session id in a JSON object
-            ret = {"sid": sid}
+        # Send password reset emails from Synapse
+        sid = await self.identity_handler.send_threepid_validation(
+            email,
+            client_secret,
+            send_attempt,
+            self.mailer.send_password_reset_mail,
+            next_link,
+        )
 
         threepid_send_requests.labels(type="email", reason="password_reset").observe(
             send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class PasswordRestServlet(RestServlet):
@@ -349,7 +333,7 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.store = self.hs.get_datastores().main
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -358,11 +342,10 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "Adding emails have been disabled due to lack of an email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "Adding emails have been disabled due to lack of an email config"
+            )
             raise SynapseError(
                 400, "Adding an email to your account is disabled on this server"
             )
@@ -413,35 +396,20 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.requestEmailToken(
-                self.hs.config.registration.account_threepid_delegate_email,
-                email,
-                client_secret,
-                send_attempt,
-                next_link,
-            )
-        else:
-            # Send threepid validation emails from Synapse
-            sid = await self.identity_handler.send_threepid_validation(
-                email,
-                client_secret,
-                send_attempt,
-                self.mailer.send_add_threepid_mail,
-                next_link,
-            )
-
-            # Wrap the session id in a JSON object
-            ret = {"sid": sid}
+        sid = await self.identity_handler.send_threepid_validation(
+            email,
+            client_secret,
+            send_attempt,
+            self.mailer.send_add_threepid_mail,
+            next_link,
+        )
 
         threepid_send_requests.labels(type="email", reason="add_threepid").observe(
             send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class MsisdnThreepidRequestTokenRestServlet(RestServlet):
@@ -534,26 +502,19 @@ class AddThreepidEmailSubmitTokenServlet(RestServlet):
         self.config = hs.config
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self._failure_email_template = (
                 self.config.email.email_add_threepid_template_failure_html
             )
 
     async def on_GET(self, request: Request) -> None:
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "Adding emails have been disabled due to lack of an email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "Adding emails have been disabled due to lack of an email config"
+            )
             raise SynapseError(
                 400, "Adding an email to your account is disabled on this server"
             )
-        elif self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            raise SynapseError(
-                400,
-                "This homeserver is not validating threepids. Use an identity server "
-                "instead.",
-            )
 
         sid = parse_string(request, "sid", required=True)
         token = parse_string(request, "token", required=True)
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index e8e51a9c66..a8402cdb3a 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -31,7 +31,6 @@ from synapse.api.errors import (
 )
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.config import ConfigError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.ratelimiting import FederationRateLimitConfig
 from synapse.config.server import is_threepid_reserved
@@ -74,7 +73,7 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.config = hs.config
 
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.hs.config.email.can_verify_email:
             self.mailer = Mailer(
                 hs=self.hs,
                 app_name=self.config.email.email_app_name,
@@ -83,13 +82,10 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if (
-                self.hs.config.email.local_threepid_handling_disabled_due_to_email_config
-            ):
-                logger.warning(
-                    "Email registration has been disabled due to lack of email config"
-                )
+        if not self.hs.config.email.can_verify_email:
+            logger.warning(
+                "Email registration has been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based registration has been disabled on this server"
             )
@@ -138,35 +134,21 @@ class EmailRegisterRequestTokenRestServlet(RestServlet):
 
             raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE)
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE:
-            assert self.hs.config.registration.account_threepid_delegate_email
-
-            # Have the configured identity server handle the request
-            ret = await self.identity_handler.requestEmailToken(
-                self.hs.config.registration.account_threepid_delegate_email,
-                email,
-                client_secret,
-                send_attempt,
-                next_link,
-            )
-        else:
-            # Send registration emails from Synapse
-            sid = await self.identity_handler.send_threepid_validation(
-                email,
-                client_secret,
-                send_attempt,
-                self.mailer.send_registration_mail,
-                next_link,
-            )
-
-            # Wrap the session id in a JSON object
-            ret = {"sid": sid}
+        # Send registration emails from Synapse
+        sid = await self.identity_handler.send_threepid_validation(
+            email,
+            client_secret,
+            send_attempt,
+            self.mailer.send_registration_mail,
+            next_link,
+        )
 
         threepid_send_requests.labels(type="email", reason="register").observe(
             send_attempt
         )
 
-        return 200, ret
+        # Wrap the session id in a JSON object
+        return 200, {"sid": sid}
 
 
 class MsisdnRegisterRequestTokenRestServlet(RestServlet):
@@ -260,7 +242,7 @@ class RegistrationSubmitTokenServlet(RestServlet):
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
 
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL:
+        if self.config.email.can_verify_email:
             self._failure_email_template = (
                 self.config.email.email_registration_template_failure_html
             )
@@ -270,11 +252,10 @@ class RegistrationSubmitTokenServlet(RestServlet):
             raise SynapseError(
                 400, "This medium is currently not supported for registration"
             )
-        if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF:
-            if self.config.email.local_threepid_handling_disabled_due_to_email_config:
-                logger.warning(
-                    "User registration via email has been disabled due to lack of email config"
-                )
+        if not self.config.email.can_verify_email:
+            logger.warning(
+                "User registration via email has been disabled due to lack of email config"
+            )
             raise SynapseError(
                 400, "Email-based registration is disabled on this server"
             )
diff --git a/synapse/rest/synapse/client/password_reset.py b/synapse/rest/synapse/client/password_reset.py
index 6ac9dbc7c9..b9402cfb75 100644
--- a/synapse/rest/synapse/client/password_reset.py
+++ b/synapse/rest/synapse/client/password_reset.py
@@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.api.errors import ThreepidValidationError
-from synapse.config.emailconfig import ThreepidBehaviour
 from synapse.http.server import DirectServeHtmlResource
 from synapse.http.servlet import parse_string
 from synapse.util.stringutils import assert_valid_client_secret
@@ -46,9 +45,6 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource):
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
 
-        self._local_threepid_handling_disabled_due_to_email_config = (
-            hs.config.email.local_threepid_handling_disabled_due_to_email_config
-        )
         self._confirmation_email_template = (
             hs.config.email.email_password_reset_template_confirmation_html
         )
@@ -59,8 +55,8 @@ class PasswordResetSubmitTokenResource(DirectServeHtmlResource):
             hs.config.email.email_password_reset_template_failure_html
         )
 
-        # This resource should not be mounted if threepid behaviour is not LOCAL
-        assert hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL
+        # This resource should only be mounted if email validation is enabled
+        assert hs.config.email.can_verify_email
 
     async def _async_render_GET(self, request: Request) -> Tuple[int, bytes]:
         sid = parse_string(request, "sid", required=True)
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index afb08b2736..cb27458746 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -592,9 +592,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
                 "require_at_registration": True,
             },
             "account_threepid_delegates": {
-                "email": "https://id_server",
                 "msisdn": "https://id_server",
             },
+            "email": {"notif_from": "Synapse <synapse@example.com>"},
         }
     )
     def test_advertised_flows_captcha_and_terms_and_3pids(self) -> None:

From 52a0c8f2f7fc5a6dad02d3b6bdae90f3e58842c9 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 12 Jul 2022 19:46:32 +0100
Subject: [PATCH 108/178] Rename test case method to
 `add_hashes_and_signatures_from_other_server` (#13255)

---
 changelog.d/13255.misc                     |  1 +
 tests/federation/test_federation_client.py |  6 +++---
 tests/federation/test_federation_server.py | 13 ++++---------
 tests/handlers/test_federation.py          |  2 +-
 tests/handlers/test_federation_event.py    |  6 +++---
 tests/test_visibility.py                   |  2 +-
 tests/unittest.py                          |  2 +-
 7 files changed, 14 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/13255.misc

diff --git a/changelog.d/13255.misc b/changelog.d/13255.misc
new file mode 100644
index 0000000000..cba6b9ee0f
--- /dev/null
+++ b/changelog.d/13255.misc
@@ -0,0 +1 @@
+Preparatory work for a per-room rate limiter on joins.
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index 268a48d7ba..d2bda07198 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -45,7 +45,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # mock up some events to use in the response.
         # In real life, these would have things in `prev_events` and `auth_events`, but that's
         # a bit annoying to mock up, and the code under test doesn't care, so we don't bother.
-        create_event_dict = self.add_hashes_and_signatures(
+        create_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
                 "room_id": test_room_id,
                 "type": "m.room.create",
@@ -57,7 +57,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
                 "origin_server_ts": 500,
             }
         )
-        member_event_dict = self.add_hashes_and_signatures(
+        member_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
                 "room_id": test_room_id,
                 "type": "m.room.member",
@@ -69,7 +69,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
                 "origin_server_ts": 600,
             }
         )
-        pl_event_dict = self.add_hashes_and_signatures(
+        pl_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
                 "room_id": test_room_id,
                 "type": "m.room.power_levels",
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 413b3c9426..fd15e680ed 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -20,7 +20,6 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.config.server import DEFAULT_ROOM_VERSION
-from synapse.crypto.event_signing import add_hashes_and_signatures
 from synapse.events import make_event_from_dict
 from synapse.federation.federation_server import server_matches_acl_event
 from synapse.rest import admin
@@ -163,11 +162,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         join_result = self._make_join(joining_user)
 
         join_event_dict = join_result["event"]
-        add_hashes_and_signatures(
-            KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],
+        self.add_hashes_and_signatures_from_other_server(
             join_event_dict,
-            signature_name=self.OTHER_SERVER_NAME,
-            signing_key=self.OTHER_SERVER_SIGNATURE_KEY,
+            KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],
         )
         channel = self.make_signed_federation_request(
             "PUT",
@@ -220,11 +217,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         join_result = self._make_join(joining_user)
 
         join_event_dict = join_result["event"]
-        add_hashes_and_signatures(
-            KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],
+        self.add_hashes_and_signatures_from_other_server(
             join_event_dict,
-            signature_name=self.OTHER_SERVER_NAME,
-            signing_key=self.OTHER_SERVER_SIGNATURE_KEY,
+            KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],
         )
         channel = self.make_signed_federation_request(
             "PUT",
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 9b9c11fab7..712933f9ca 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -256,7 +256,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
         ]
         for _ in range(0, 8):
             event = make_event_from_dict(
-                self.add_hashes_and_signatures(
+                self.add_hashes_and_signatures_from_other_server(
                     {
                         "origin_server_ts": 1,
                         "type": "m.room.message",
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index 4b1a8f04db..51c8dd6498 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -104,7 +104,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
         # mock up a load of state events which we are missing
         state_events = [
             make_event_from_dict(
-                self.add_hashes_and_signatures(
+                self.add_hashes_and_signatures_from_other_server(
                     {
                         "type": "test_state_type",
                         "state_key": f"state_{i}",
@@ -131,7 +131,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
         # Depending on the test, we either persist this upfront (as an outlier),
         # or let the server request it.
         prev_event = make_event_from_dict(
-            self.add_hashes_and_signatures(
+            self.add_hashes_and_signatures_from_other_server(
                 {
                     "type": "test_regular_type",
                     "room_id": room_id,
@@ -165,7 +165,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
 
         # mock up a regular event to pass into _process_pulled_event
         pulled_event = make_event_from_dict(
-            self.add_hashes_and_signatures(
+            self.add_hashes_and_signatures_from_other_server(
                 {
                     "type": "test_regular_type",
                     "room_id": room_id,
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index f338af6c36..c385b2f8d4 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -272,7 +272,7 @@ class FilterEventsForClientTestCase(unittest.FederatingHomeserverTestCase):
             "state_key": "@user:test",
             "content": {"membership": "invite"},
         }
-        self.add_hashes_and_signatures(invite_pdu)
+        self.add_hashes_and_signatures_from_other_server(invite_pdu)
         invite_event_id = make_event_from_dict(invite_pdu, RoomVersions.V9).event_id
 
         self.get_success(
diff --git a/tests/unittest.py b/tests/unittest.py
index c645dd3563..7b97a4bf6e 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -838,7 +838,7 @@ class FederatingHomeserverTestCase(HomeserverTestCase):
             client_ip=client_ip,
         )
 
-    def add_hashes_and_signatures(
+    def add_hashes_and_signatures_from_other_server(
         self,
         event_dict: JsonDict,
         room_version: RoomVersion = KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],

From 7218a0ca1871c881070a25e33bb1198f51ba1e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jacek=20Ku=C5=9Bnierz?= <kusnierz@protonmail.com>
Date: Tue, 12 Jul 2022 20:48:29 +0200
Subject: [PATCH 109/178] Drop support for calling
 `/_matrix/client/v3/account/3pid/bind` without an `id_access_token` (#13239)

Fixes #13201

Signed-off-by: Jacek Kusnierz jacek.kusnierz@tum.de
---
 changelog.d/13239.removal      |  1 +
 synapse/handlers/identity.py   | 30 ++++++------------------------
 synapse/rest/client/account.py |  6 ++++--
 3 files changed, 11 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/13239.removal

diff --git a/changelog.d/13239.removal b/changelog.d/13239.removal
new file mode 100644
index 0000000000..8f6045176d
--- /dev/null
+++ b/changelog.d/13239.removal
@@ -0,0 +1 @@
+Drop support for calling `/_matrix/client/v3/account/3pid/bind` without an `id_access_token`, which was not permitted by the spec. Contributed by @Vetchu.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index c70fdcc85e..164d891e90 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -162,8 +162,7 @@ class IdentityHandler:
         sid: str,
         mxid: str,
         id_server: str,
-        id_access_token: Optional[str] = None,
-        use_v2: bool = True,
+        id_access_token: str,
     ) -> JsonDict:
         """Bind a 3PID to an identity server
 
@@ -173,8 +172,7 @@ class IdentityHandler:
             mxid: The MXID to bind the 3PID to
             id_server: The domain of the identity server to query
             id_access_token: The access token to authenticate to the identity
-                server with, if necessary. Required if use_v2 is true
-            use_v2: Whether to use v2 Identity Service API endpoints. Defaults to True
+                server with
 
         Raises:
             SynapseError: On any of the following conditions
@@ -186,24 +184,15 @@ class IdentityHandler:
         """
         logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server)
 
-        # If an id_access_token is not supplied, force usage of v1
-        if id_access_token is None:
-            use_v2 = False
-
         if not valid_id_server_location(id_server):
             raise SynapseError(
                 400,
                 "id_server must be a valid hostname with optional port and path components",
             )
 
-        # Decide which API endpoint URLs to use
-        headers = {}
         bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid}
-        if use_v2:
-            bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
-            headers["Authorization"] = create_id_access_token_header(id_access_token)  # type: ignore
-        else:
-            bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
+        bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
+        headers = {"Authorization": create_id_access_token_header(id_access_token)}
 
         try:
             # Use the blacklisting http client as this call is only to identity servers
@@ -222,21 +211,14 @@ class IdentityHandler:
 
             return data
         except HttpResponseException as e:
-            if e.code != 404 or not use_v2:
-                logger.error("3PID bind failed with Matrix error: %r", e)
-                raise e.to_synapse_error()
+            logger.error("3PID bind failed with Matrix error: %r", e)
+            raise e.to_synapse_error()
         except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except CodeMessageException as e:
             data = json_decoder.decode(e.msg)  # XXX WAT?
             return data
 
-        logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url)
-        res = await self.bind_threepid(
-            client_secret, sid, mxid, id_server, id_access_token, use_v2=False
-        )
-        return res
-
     async def try_unbind_threepid(self, mxid: str, threepid: dict) -> bool:
         """Attempt to remove a 3PID from an identity server, or if one is not provided, all
         identity servers we're aware the binding is present on
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 19c2da4244..0cc87a4001 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -704,10 +704,12 @@ class ThreepidBindRestServlet(RestServlet):
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         body = parse_json_object_from_request(request)
 
-        assert_params_in_dict(body, ["id_server", "sid", "client_secret"])
+        assert_params_in_dict(
+            body, ["id_server", "sid", "id_access_token", "client_secret"]
+        )
         id_server = body["id_server"]
         sid = body["sid"]
-        id_access_token = body.get("id_access_token")  # optional
+        id_access_token = body["id_access_token"]
         client_secret = body["client_secret"]
         assert_valid_client_secret(client_secret)
 

From a366b75b7241512b319d28ba95c65995e24de9ed Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 12 Jul 2022 19:52:06 +0100
Subject: [PATCH 110/178] Drop unused table `event_reference_hashes` (#13218)

This is unused since Synapse 1.60.0 (#12679). It's time for it to go.
---
 changelog.d/13218.misc                          |  1 +
 .../delta/72/03drop_event_reference_hashes.sql  | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 changelog.d/13218.misc
 create mode 100644 synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql

diff --git a/changelog.d/13218.misc b/changelog.d/13218.misc
new file mode 100644
index 0000000000..b1c8e5c747
--- /dev/null
+++ b/changelog.d/13218.misc
@@ -0,0 +1 @@
+Remove unused database table `event_reference_hashes`.
diff --git a/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql b/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql
new file mode 100644
index 0000000000..0da668aa3a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/03drop_event_reference_hashes.sql
@@ -0,0 +1,17 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- event_reference_hashes is unused, so we can drop it
+DROP TABLE event_reference_hashes;

From 1381563988c6dc7a2b8801b736b1f0c663970da8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 12 Jul 2022 15:01:58 -0400
Subject: [PATCH 111/178] Inline URL preview documentation. (#13261)

Inline URL preview documentation near the implementation.
---
 changelog.d/13233.doc                         |  3 +-
 changelog.d/13261.doc                         |  1 +
 docs/SUMMARY.md                               |  1 -
 docs/admin_api/user_admin_api.md              |  2 +-
 docs/development/url_previews.md              | 62 -------------------
 docs/media_repository.md                      |  5 +-
 synapse/rest/media/v1/preview_url_resource.py | 60 +++++++++++++++++-
 7 files changed, 61 insertions(+), 73 deletions(-)
 create mode 100644 changelog.d/13261.doc
 delete mode 100644 docs/development/url_previews.md

diff --git a/changelog.d/13233.doc b/changelog.d/13233.doc
index b6babd7f15..3eaea7c5e3 100644
--- a/changelog.d/13233.doc
+++ b/changelog.d/13233.doc
@@ -1,2 +1 @@
-Add a link to configuration instructions in the URL preview documentation.
-
+Move the documentation for how URL previews work to the URL preview module.
diff --git a/changelog.d/13261.doc b/changelog.d/13261.doc
new file mode 100644
index 0000000000..3eaea7c5e3
--- /dev/null
+++ b/changelog.d/13261.doc
@@ -0,0 +1 @@
+Move the documentation for how URL previews work to the URL preview module.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 8d6030e34a..f54b571d38 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -35,7 +35,6 @@
     - [Application Services](application_services.md)
     - [Server Notices](server_notices.md)
     - [Consent Tracking](consent_tracking.md)
-    - [URL Previews](development/url_previews.md)
     - [User Directory](user_directory.md)
     - [Message Retention Policies](message_retention_policies.md)
     - [Pluggable Modules](modules/index.md)
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 1235f1cb95..0871cfebf5 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -544,7 +544,7 @@ Gets a list of all local media that a specific `user_id` has created.
 These are media that the user has uploaded themselves
 ([local media](../media_repository.md#local-media)), as well as
 [URL preview images](../media_repository.md#url-previews) requested by the user if the
-[feature is enabled](../development/url_previews.md).
+[feature is enabled](../usage/configuration/config_documentation.md#url_preview_enabled).
 
 By default, the response is ordered by descending creation date and ascending media ID.
 The newest media is on top. You can change the order with parameters
diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md
deleted file mode 100644
index 25f189683d..0000000000
--- a/docs/development/url_previews.md
+++ /dev/null
@@ -1,62 +0,0 @@
-URL Previews
-============
-For information on how to enable URL previews in synapse, please see the [config manual](../usage/configuration/config_documentation.md#url_preview_enabled).
-
-The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
-for URLs which outputs [Open Graph](https://ogp.me/) responses (with some Matrix
-specific additions).
-
-This does have trade-offs compared to other designs:
-
-* Pros:
-  * Simple and flexible; can be used by any clients at any point
-* Cons:
-  * If each homeserver provides one of these independently, all the HSes in a
-    room may needlessly DoS the target URI
-  * The URL metadata must be stored somewhere, rather than just using Matrix
-    itself to store the media.
-  * Matrix cannot be used to distribute the metadata between homeservers.
-
-When Synapse is asked to preview a URL it does the following:
-
-1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
-   config).
-2. Checks the in-memory cache by URLs and returns the result if it exists. (This
-   is also used to de-duplicate processing of multiple in-flight requests at once.)
-3. Kicks off a background process to generate a preview:
-   1. Checks the database cache by URL and timestamp and returns the result if it
-      has not expired and was successful (a 2xx return code).
-   2. Checks if the URL matches an [oEmbed](https://oembed.com/) pattern. If it
-      does, update the URL to download.
-   3. Downloads the URL and stores it into a file via the media storage provider
-      and saves the local media metadata.
-   4. If the media is an image:
-      1. Generates thumbnails.
-      2. Generates an Open Graph response based on image properties.
-   5. If the media is HTML:
-      1. Decodes the HTML via the stored file.
-      2. Generates an Open Graph response from the HTML.
-      3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
-         1. Downloads the URL and stores it into a file via the media storage provider
-            and saves the local media metadata.
-         2. Convert the oEmbed response to an Open Graph response.
-         3. Override any Open Graph data from the HTML with data from oEmbed.
-      4. If an image exists in the Open Graph response:
-         1. Downloads the URL and stores it into a file via the media storage
-            provider and saves the local media metadata.
-         2. Generates thumbnails.
-         3. Updates the Open Graph response based on image properties.
-   6. If the media is JSON and an oEmbed URL was found:
-      1. Convert the oEmbed response to an Open Graph response.
-      2. If a thumbnail or image is in the oEmbed response:
-         1. Downloads the URL and stores it into a file via the media storage
-            provider and saves the local media metadata.
-         2. Generates thumbnails.
-         3. Updates the Open Graph response based on image properties.
-   7. Stores the result in the database cache.
-4. Returns the result.
-
-The in-memory cache expires after 1 hour.
-
-Expired entries in the database cache (and their associated media files) are
-deleted every 10 seconds. The default expiration time is 1 hour from download.
diff --git a/docs/media_repository.md b/docs/media_repository.md
index ba17f8a856..23e6da7f31 100644
--- a/docs/media_repository.md
+++ b/docs/media_repository.md
@@ -7,8 +7,7 @@ The media repository
    users.
  * caches avatars, attachments and their thumbnails for media uploaded by remote
    users.
- * caches resources and thumbnails used for
-   [URL previews](development/url_previews.md).
+ * caches resources and thumbnails used for URL previews.
 
 All media in Matrix can be identified by a unique
 [MXC URI](https://spec.matrix.org/latest/client-server-api/#matrix-content-mxc-uris),
@@ -59,8 +58,6 @@ remote_thumbnail/matrix.org/aa/bb/cccccccccccccccccccc/128-96-image-jpeg
 Note that `remote_thumbnail/` does not have an `s`.
 
 ## URL Previews
-See [URL Previews](development/url_previews.md) for documentation on the URL preview
-process.
 
 When generating previews for URLs, Synapse may download and cache various
 resources, including images. These resources are assigned temporary media IDs
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 54a849eac9..b36c98a08e 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -109,10 +109,64 @@ class MediaInfo:
 
 class PreviewUrlResource(DirectServeJsonResource):
     """
-    Generating URL previews is a complicated task which many potential pitfalls.
+    The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
+    for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
+    specific additions).
 
-    See docs/development/url_previews.md for discussion of the design and
-    algorithm followed in this module.
+    This does have trade-offs compared to other designs:
+
+    * Pros:
+      * Simple and flexible; can be used by any clients at any point
+    * Cons:
+      * If each homeserver provides one of these independently, all the homeservers in a
+        room may needlessly DoS the target URI
+      * The URL metadata must be stored somewhere, rather than just using Matrix
+        itself to store the media.
+      * Matrix cannot be used to distribute the metadata between homeservers.
+
+    When Synapse is asked to preview a URL it does the following:
+
+    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
+       config).
+    2. Checks the URL against an in-memory cache and returns the result if it exists. (This
+       is also used to de-duplicate processing of multiple in-flight requests at once.)
+    3. Kicks off a background process to generate a preview:
+       1. Checks URL and timestamp against the database cache and returns the result if it
+          has not expired and was successful (a 2xx return code).
+       2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
+          does, update the URL to download.
+       3. Downloads the URL and stores it into a file via the media storage provider
+          and saves the local media metadata.
+       4. If the media is an image:
+          1. Generates thumbnails.
+          2. Generates an Open Graph response based on image properties.
+       5. If the media is HTML:
+          1. Decodes the HTML via the stored file.
+          2. Generates an Open Graph response from the HTML.
+          3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
+             1. Downloads the URL and stores it into a file via the media storage provider
+                and saves the local media metadata.
+             2. Convert the oEmbed response to an Open Graph response.
+             3. Override any Open Graph data from the HTML with data from oEmbed.
+          4. If an image exists in the Open Graph response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       6. If the media is JSON and an oEmbed URL was found:
+          1. Convert the oEmbed response to an Open Graph response.
+          2. If a thumbnail or image is in the oEmbed response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       7. Stores the result in the database cache.
+    4. Returns the result.
+
+    The in-memory cache expires after 1 hour.
+
+    Expired entries in the database cache (and their associated media files) are
+    deleted every 10 seconds. The default expiration time is 1 hour from download.
     """
 
     isLeaf = True

From 0312ff44c69ba16371206fc42b9f886b24253bcd Mon Sep 17 00:00:00 2001
From: Thomas Weston <thomasweston12@gmail.com>
Date: Wed, 13 Jul 2022 11:33:21 +0100
Subject: [PATCH 112/178] Fix "add user" admin api error when request contains
 a "msisdn" threepid (#13263)

Co-authored-by: Thomas Weston <thomas.weston@clearspancloud.com>
Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
---
 changelog.d/13263.bugfix      |  1 +
 synapse/rest/admin/users.py   |  1 +
 tests/rest/admin/test_user.py | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 changelog.d/13263.bugfix

diff --git a/changelog.d/13263.bugfix b/changelog.d/13263.bugfix
new file mode 100644
index 0000000000..91e1d1e7eb
--- /dev/null
+++ b/changelog.d/13263.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.15.0 where adding a user through the Synapse Admin API with a phone number would fail if the "enable_email_notifs" and "email_notifs_for_new_users" options were enabled. Contributed by @thomasweston12.
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index f0614a2897..ba2f7fa6d8 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -373,6 +373,7 @@ class UserRestServletV2(RestServlet):
                     if (
                         self.hs.config.email.email_enable_notifs
                         and self.hs.config.email.email_notif_for_new_users
+                        and medium == "email"
                     ):
                         await self.pusher_pool.add_pusher(
                             user_id=user_id,
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index e32aaadb98..97693cd1e2 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1636,6 +1636,41 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(len(pushers), 0)
 
+    @override_config(
+        {
+            "email": {
+                "enable_notifs": True,
+                "notif_for_new_users": True,
+                "notif_from": "test@example.com",
+            },
+            "public_baseurl": "https://example.com",
+        }
+    )
+    def test_create_user_email_notif_for_new_users_with_msisdn_threepid(self) -> None:
+        """
+        Check that a new regular user is created successfully when they have a msisdn
+        threepid and email notif_for_new_users is set to True.
+        """
+        url = self.url_prefix % "@bob:test"
+
+        # Create user
+        body = {
+            "password": "abc123",
+            "threepids": [{"medium": "msisdn", "address": "1234567890"}],
+        }
+
+        channel = self.make_request(
+            "PUT",
+            url,
+            access_token=self.admin_user_tok,
+            content=body,
+        )
+
+        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual("@bob:test", channel.json_body["name"])
+        self.assertEqual("msisdn", channel.json_body["threepids"][0]["medium"])
+        self.assertEqual("1234567890", channel.json_body["threepids"][0]["address"])
+
     def test_set_password(self) -> None:
         """
         Test setting a new password for another user.

From 90e9b4fa1e51a271e3cf46d38efd5d93c0c99857 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 13 Jul 2022 08:30:42 -0400
Subject: [PATCH 113/178] Do not fail build if complement with workers fails.
 (#13266)

---
 .github/workflows/tests.yml | 27 ++++++++++++++++++++++++---
 changelog.d/13266.misc      |  1 +
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13266.misc

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4bc29c8207..c8b033e8a4 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -328,9 +328,6 @@ jobs:
           - arrangement: monolith
             database: Postgres
 
-          - arrangement: workers
-            database: Postgres
-
     steps:
       - name: Run actions/checkout@v2 for synapse
         uses: actions/checkout@v2
@@ -346,6 +343,30 @@ jobs:
         shell: bash
         name: Run Complement Tests
 
+  # XXX When complement with workers is stable, move this back into the standard
+  #     "complement" matrix above.
+  #
+  # See https://github.com/matrix-org/synapse/issues/13161
+  complement-workers:
+    if: "${{ !failure() && !cancelled() }}"
+    needs: linting-done
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Run actions/checkout@v2 for synapse
+        uses: actions/checkout@v2
+        with:
+          path: synapse
+
+      - name: Prepare Complement's Prerequisites
+        run: synapse/.ci/scripts/setup_complement_prerequisites.sh
+
+      - run: |
+          set -o pipefail
+          POSTGRES=1 WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
+        shell: bash
+        name: Run Complement Tests
+
   # a job which marks all the other jobs as complete, thus allowing PRs to be merged.
   tests-done:
     if: ${{ always() }}
diff --git a/changelog.d/13266.misc b/changelog.d/13266.misc
new file mode 100644
index 0000000000..d583acb81b
--- /dev/null
+++ b/changelog.d/13266.misc
@@ -0,0 +1 @@
+Do not fail build if complement with workers fails.

From 4db7862e0fce1a7b1a84458d572ba006b032c737 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 13 Jul 2022 09:55:14 -0400
Subject: [PATCH 114/178] Drop unused tables from groups/communities. (#12967)

These tables have been unused since Synapse v1.61.0, although schema version 72
was added in Synapse v1.62.0.
---
 changelog.d/12967.removal                     |  1 +
 synapse/_scripts/synapse_port_db.py           | 16 ----------
 synapse/storage/schema/__init__.py            |  7 +++--
 .../schema/main/delta/72/03remove_groups.sql  | 31 +++++++++++++++++++
 4 files changed, 36 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/12967.removal
 create mode 100644 synapse/storage/schema/main/delta/72/03remove_groups.sql

diff --git a/changelog.d/12967.removal b/changelog.d/12967.removal
new file mode 100644
index 0000000000..0aafd6a4d9
--- /dev/null
+++ b/changelog.d/12967.removal
@@ -0,0 +1 @@
+Drop tables used for groups/communities.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 26834a437e..543bba27c2 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -166,22 +166,6 @@ IGNORED_TABLES = {
     "ui_auth_sessions",
     "ui_auth_sessions_credentials",
     "ui_auth_sessions_ips",
-    # Groups/communities is no longer supported.
-    "group_attestations_remote",
-    "group_attestations_renewals",
-    "group_invites",
-    "group_roles",
-    "group_room_categories",
-    "group_rooms",
-    "group_summary_roles",
-    "group_summary_room_categories",
-    "group_summary_rooms",
-    "group_summary_users",
-    "group_users",
-    "groups",
-    "local_group_membership",
-    "local_group_updates",
-    "remote_profile_cache",
 }
 
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index dc237e3032..a9a88c8bfd 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -74,13 +74,14 @@ Changes in SCHEMA_VERSION = 71:
 
 Changes in SCHEMA_VERSION = 72:
     - event_edges.(room_id, is_state) are no longer written to.
+    - Tables related to groups are dropped.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # We no longer maintain `event_edges.room_id`, so synapses with SCHEMA_VERSION < 71
-    # will break.
-    71
+    # The groups tables are no longer accessible, so synapses with SCHEMA_VERSION < 72
+    # could break.
+    72
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/72/03remove_groups.sql b/synapse/storage/schema/main/delta/72/03remove_groups.sql
new file mode 100644
index 0000000000..b7c5894de8
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/03remove_groups.sql
@@ -0,0 +1,31 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Remove the tables which powered the unspecced groups/communities feature.
+DROP TABLE IF EXISTS group_attestations_remote;
+DROP TABLE IF EXISTS group_attestations_renewals;
+DROP TABLE IF EXISTS group_invites;
+DROP TABLE IF EXISTS group_roles;
+DROP TABLE IF EXISTS group_room_categories;
+DROP TABLE IF EXISTS group_rooms;
+DROP TABLE IF EXISTS group_summary_roles;
+DROP TABLE IF EXISTS group_summary_room_categories;
+DROP TABLE IF EXISTS group_summary_rooms;
+DROP TABLE IF EXISTS group_summary_users;
+DROP TABLE IF EXISTS group_users;
+DROP TABLE IF EXISTS groups;
+DROP TABLE IF EXISTS local_group_membership;
+DROP TABLE IF EXISTS local_group_updates;
+DROP TABLE IF EXISTS remote_profile_cache;

From 3371e1abcb607776c6351ab5d73ba38f1db8a8f2 Mon Sep 17 00:00:00 2001
From: Brad Murray <brad@beeper.com>
Date: Wed, 13 Jul 2022 10:18:20 -0400
Subject: [PATCH 115/178] Add prometheus counters for content types other than
 events (#13175)

---
 changelog.d/13175.misc    |  1 +
 synapse/appservice/api.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 changelog.d/13175.misc

diff --git a/changelog.d/13175.misc b/changelog.d/13175.misc
new file mode 100644
index 0000000000..f273b3d6ca
--- /dev/null
+++ b/changelog.d/13175.misc
@@ -0,0 +1 @@
+Add prometheus counters for ephemeral events and to device messages pushed to app services. Contributed by Brad @ Beeper.
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index df1c214462..0963fb3bb4 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -53,6 +53,18 @@ sent_events_counter = Counter(
     "synapse_appservice_api_sent_events", "Number of events sent to the AS", ["service"]
 )
 
+sent_ephemeral_counter = Counter(
+    "synapse_appservice_api_sent_ephemeral",
+    "Number of ephemeral events sent to the AS",
+    ["service"],
+)
+
+sent_todevice_counter = Counter(
+    "synapse_appservice_api_sent_todevice",
+    "Number of todevice messages sent to the AS",
+    ["service"],
+)
+
 HOUR_IN_MS = 60 * 60 * 1000
 
 
@@ -310,6 +322,8 @@ class ApplicationServiceApi(SimpleHttpClient):
                 )
             sent_transactions_counter.labels(service.id).inc()
             sent_events_counter.labels(service.id).inc(len(serialized_events))
+            sent_ephemeral_counter.labels(service.id).inc(len(ephemeral))
+            sent_todevice_counter.labels(service.id).inc(len(to_device_messages))
             return True
         except CodeMessageException as e:
             logger.warning(

From 1d5c80b16188c587427d663c3bec57e9c196dd1b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 13 Jul 2022 13:23:16 -0400
Subject: [PATCH 116/178] Reduce duplicate code in receipts servlets. (#13198)

---
 changelog.d/13198.misc             |  1 +
 synapse/rest/client/read_marker.py | 54 +++++++++++++-----------------
 synapse/rest/client/receipts.py    | 20 +++++------
 3 files changed, 32 insertions(+), 43 deletions(-)
 create mode 100644 changelog.d/13198.misc

diff --git a/changelog.d/13198.misc b/changelog.d/13198.misc
new file mode 100644
index 0000000000..5aef2432df
--- /dev/null
+++ b/changelog.d/13198.misc
@@ -0,0 +1 @@
+Refactor receipts servlet logic to avoid duplicated code.
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 3644705e6a..8896f2df50 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -40,6 +40,10 @@ class ReadMarkerRestServlet(RestServlet):
         self.read_marker_handler = hs.get_read_marker_handler()
         self.presence_handler = hs.get_presence_handler()
 
+        self._known_receipt_types = {ReceiptTypes.READ, ReceiptTypes.FULLY_READ}
+        if hs.config.experimental.msc2285_enabled:
+            self._known_receipt_types.add(ReceiptTypes.READ_PRIVATE)
+
     async def on_POST(
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
@@ -49,13 +53,7 @@ class ReadMarkerRestServlet(RestServlet):
 
         body = parse_json_object_from_request(request)
 
-        valid_receipt_types = {
-            ReceiptTypes.READ,
-            ReceiptTypes.FULLY_READ,
-            ReceiptTypes.READ_PRIVATE,
-        }
-
-        unrecognized_types = set(body.keys()) - valid_receipt_types
+        unrecognized_types = set(body.keys()) - self._known_receipt_types
         if unrecognized_types:
             # It's fine if there are unrecognized receipt types, but let's log
             # it to help debug clients that have typoed the receipt type.
@@ -65,31 +63,25 @@ class ReadMarkerRestServlet(RestServlet):
             # types.
             logger.info("Ignoring unrecognized receipt types: %s", unrecognized_types)
 
-        read_event_id = body.get(ReceiptTypes.READ, None)
-        if read_event_id:
-            await self.receipts_handler.received_client_receipt(
-                room_id,
-                ReceiptTypes.READ,
-                user_id=requester.user.to_string(),
-                event_id=read_event_id,
-            )
+        for receipt_type in self._known_receipt_types:
+            event_id = body.get(receipt_type, None)
+            # TODO Add validation to reject non-string event IDs.
+            if not event_id:
+                continue
 
-        read_private_event_id = body.get(ReceiptTypes.READ_PRIVATE, None)
-        if read_private_event_id and self.config.experimental.msc2285_enabled:
-            await self.receipts_handler.received_client_receipt(
-                room_id,
-                ReceiptTypes.READ_PRIVATE,
-                user_id=requester.user.to_string(),
-                event_id=read_private_event_id,
-            )
-
-        read_marker_event_id = body.get(ReceiptTypes.FULLY_READ, None)
-        if read_marker_event_id:
-            await self.read_marker_handler.received_client_read_marker(
-                room_id,
-                user_id=requester.user.to_string(),
-                event_id=read_marker_event_id,
-            )
+            if receipt_type == ReceiptTypes.FULLY_READ:
+                await self.read_marker_handler.received_client_read_marker(
+                    room_id,
+                    user_id=requester.user.to_string(),
+                    event_id=event_id,
+                )
+            else:
+                await self.receipts_handler.received_client_receipt(
+                    room_id,
+                    receipt_type,
+                    user_id=requester.user.to_string(),
+                    event_id=event_id,
+                )
 
         return 200, {}
 
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 4b03eb876b..409bfd43c1 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -39,31 +39,27 @@ class ReceiptRestServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
-        self.hs = hs
         self.auth = hs.get_auth()
         self.receipts_handler = hs.get_receipts_handler()
         self.read_marker_handler = hs.get_read_marker_handler()
         self.presence_handler = hs.get_presence_handler()
 
+        self._known_receipt_types = {ReceiptTypes.READ}
+        if hs.config.experimental.msc2285_enabled:
+            self._known_receipt_types.update(
+                (ReceiptTypes.READ_PRIVATE, ReceiptTypes.FULLY_READ)
+            )
+
     async def on_POST(
         self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
-        if self.hs.config.experimental.msc2285_enabled and receipt_type not in [
-            ReceiptTypes.READ,
-            ReceiptTypes.READ_PRIVATE,
-            ReceiptTypes.FULLY_READ,
-        ]:
+        if receipt_type not in self._known_receipt_types:
             raise SynapseError(
                 400,
-                "Receipt type must be 'm.read', 'org.matrix.msc2285.read.private' or 'm.fully_read'",
+                f"Receipt type must be {', '.join(self._known_receipt_types)}",
             )
-        elif (
-            not self.hs.config.experimental.msc2285_enabled
-            and receipt_type != ReceiptTypes.READ
-        ):
-            raise SynapseError(400, "Receipt type must be 'm.read'")
 
         parse_json_object_from_request(request, allow_empty_body=False)
 

From 982fe2965515e4536a0aa0153fa6bee238179f51 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 13 Jul 2022 20:32:46 +0200
Subject: [PATCH 117/178] Optimise room creation event lookups part 2 (#13224)

---
 changelog.d/13224.misc          |  1 +
 synapse/handlers/room.py        | 45 +++++++++++++++++++++++++++------
 synapse/handlers/room_member.py | 43 ++++++++++++++++++++++++++-----
 tests/rest/client/test_rooms.py |  8 +++---
 4 files changed, 78 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/13224.misc

diff --git a/changelog.d/13224.misc b/changelog.d/13224.misc
new file mode 100644
index 0000000000..41f8693b74
--- /dev/null
+++ b/changelog.d/13224.misc
@@ -0,0 +1 @@
+Further reduce queries used sending events when creating new rooms. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index a54f163c0a..978d3ee39f 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -889,7 +889,11 @@ class RoomCreationHandler:
         # override any attempt to set room versions via the creation_content
         creation_content["room_version"] = room_version.identifier
 
-        last_stream_id = await self._send_events_for_new_room(
+        (
+            last_stream_id,
+            last_sent_event_id,
+            depth,
+        ) = await self._send_events_for_new_room(
             requester,
             room_id,
             preset_config=preset_config,
@@ -905,7 +909,7 @@ class RoomCreationHandler:
         if "name" in config:
             name = config["name"]
             (
-                _,
+                name_event,
                 last_stream_id,
             ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 requester,
@@ -917,12 +921,16 @@ class RoomCreationHandler:
                     "content": {"name": name},
                 },
                 ratelimit=False,
+                prev_event_ids=[last_sent_event_id],
+                depth=depth,
             )
+            last_sent_event_id = name_event.event_id
+            depth += 1
 
         if "topic" in config:
             topic = config["topic"]
             (
-                _,
+                topic_event,
                 last_stream_id,
             ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 requester,
@@ -934,7 +942,11 @@ class RoomCreationHandler:
                     "content": {"topic": topic},
                 },
                 ratelimit=False,
+                prev_event_ids=[last_sent_event_id],
+                depth=depth,
             )
+            last_sent_event_id = topic_event.event_id
+            depth += 1
 
         # we avoid dropping the lock between invites, as otherwise joins can
         # start coming in and making the createRoom slow.
@@ -949,7 +961,7 @@ class RoomCreationHandler:
 
             for invitee in invite_list:
                 (
-                    _,
+                    member_event_id,
                     last_stream_id,
                 ) = await self.room_member_handler.update_membership_locked(
                     requester,
@@ -959,7 +971,11 @@ class RoomCreationHandler:
                     ratelimit=False,
                     content=content,
                     new_room=True,
+                    prev_event_ids=[last_sent_event_id],
+                    depth=depth,
                 )
+                last_sent_event_id = member_event_id
+                depth += 1
 
         for invite_3pid in invite_3pid_list:
             id_server = invite_3pid["id_server"]
@@ -968,7 +984,10 @@ class RoomCreationHandler:
             medium = invite_3pid["medium"]
             # Note that do_3pid_invite can raise a  ShadowBanError, but this was
             # handled above by emptying invite_3pid_list.
-            last_stream_id = await self.hs.get_room_member_handler().do_3pid_invite(
+            (
+                member_event_id,
+                last_stream_id,
+            ) = await self.hs.get_room_member_handler().do_3pid_invite(
                 room_id,
                 requester.user,
                 medium,
@@ -977,7 +996,11 @@ class RoomCreationHandler:
                 requester,
                 txn_id=None,
                 id_access_token=id_access_token,
+                prev_event_ids=[last_sent_event_id],
+                depth=depth,
             )
+            last_sent_event_id = member_event_id
+            depth += 1
 
         result = {"room_id": room_id}
 
@@ -1005,20 +1028,22 @@ class RoomCreationHandler:
         power_level_content_override: Optional[JsonDict] = None,
         creator_join_profile: Optional[JsonDict] = None,
         ratelimit: bool = True,
-    ) -> int:
+    ) -> Tuple[int, str, int]:
         """Sends the initial events into a new room.
 
         `power_level_content_override` doesn't apply when initial state has
         power level state event content.
 
         Returns:
-            The stream_id of the last event persisted.
+            A tuple containing the stream ID, event ID and depth of the last
+            event sent to the room.
         """
 
         creator_id = creator.user.to_string()
 
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
 
+        depth = 1
         last_sent_event_id: Optional[str] = None
 
         def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
@@ -1031,6 +1056,7 @@ class RoomCreationHandler:
 
         async def send(etype: str, content: JsonDict, **kwargs: Any) -> int:
             nonlocal last_sent_event_id
+            nonlocal depth
 
             event = create(etype, content, **kwargs)
             logger.debug("Sending %s in new room", etype)
@@ -1047,9 +1073,11 @@ class RoomCreationHandler:
                 # Note: we don't pass state_event_ids here because this triggers
                 # an additional query per event to look them up from the events table.
                 prev_event_ids=[last_sent_event_id] if last_sent_event_id else [],
+                depth=depth,
             )
 
             last_sent_event_id = sent_event.event_id
+            depth += 1
 
             return last_stream_id
 
@@ -1075,6 +1103,7 @@ class RoomCreationHandler:
             content=creator_join_profile,
             new_room=True,
             prev_event_ids=[last_sent_event_id],
+            depth=depth,
         )
         last_sent_event_id = member_event_id
 
@@ -1168,7 +1197,7 @@ class RoomCreationHandler:
                 content={"algorithm": RoomEncryptionAlgorithms.DEFAULT},
             )
 
-        return last_sent_stream_id
+        return last_sent_stream_id, last_sent_event_id, depth
 
     def _generate_room_id(self) -> str:
         """Generates a random room ID.
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 04c44b2ccb..90e0b21600 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -285,6 +285,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         allow_no_prev_events: bool = False,
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
+        depth: Optional[int] = None,
         txn_id: Optional[str] = None,
         ratelimit: bool = True,
         content: Optional[dict] = None,
@@ -315,6 +316,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 prev_events are set so we need to set them ourself via this argument.
                 This should normally be left as None, which will cause the auth_event_ids
                 to be calculated based on the room state at the prev_events.
+            depth: Override the depth used to order the event in the DAG.
+                Should normally be set to None, which will cause the depth to be calculated
+                based on the prev_events.
 
             txn_id:
             ratelimit:
@@ -370,6 +374,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             allow_no_prev_events=allow_no_prev_events,
             prev_event_ids=prev_event_ids,
             state_event_ids=state_event_ids,
+            depth=depth,
             require_consent=require_consent,
             outlier=outlier,
             historical=historical,
@@ -466,6 +471,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         allow_no_prev_events: bool = False,
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
+        depth: Optional[int] = None,
     ) -> Tuple[str, int]:
         """Update a user's membership in a room.
 
@@ -501,6 +507,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 prev_events are set so we need to set them ourself via this argument.
                 This should normally be left as None, which will cause the auth_event_ids
                 to be calculated based on the room state at the prev_events.
+            depth: Override the depth used to order the event in the DAG.
+                Should normally be set to None, which will cause the depth to be calculated
+                based on the prev_events.
 
         Returns:
             A tuple of the new event ID and stream ID.
@@ -540,6 +549,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     allow_no_prev_events=allow_no_prev_events,
                     prev_event_ids=prev_event_ids,
                     state_event_ids=state_event_ids,
+                    depth=depth,
                 )
 
         return result
@@ -562,6 +572,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         allow_no_prev_events: bool = False,
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
+        depth: Optional[int] = None,
     ) -> Tuple[str, int]:
         """Helper for update_membership.
 
@@ -599,6 +610,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 prev_events are set so we need to set them ourself via this argument.
                 This should normally be left as None, which will cause the auth_event_ids
                 to be calculated based on the room state at the prev_events.
+            depth: Override the depth used to order the event in the DAG.
+                Should normally be set to None, which will cause the depth to be calculated
+                based on the prev_events.
 
         Returns:
             A tuple of the new event ID and stream ID.
@@ -732,6 +746,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 allow_no_prev_events=allow_no_prev_events,
                 prev_event_ids=prev_event_ids,
                 state_event_ids=state_event_ids,
+                depth=depth,
                 content=content,
                 require_consent=require_consent,
                 outlier=outlier,
@@ -967,6 +982,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             ratelimit=ratelimit,
             prev_event_ids=latest_event_ids,
             state_event_ids=state_event_ids,
+            depth=depth,
             content=content,
             require_consent=require_consent,
             outlier=outlier,
@@ -1322,7 +1338,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         requester: Requester,
         txn_id: Optional[str],
         id_access_token: Optional[str] = None,
-    ) -> int:
+        prev_event_ids: Optional[List[str]] = None,
+        depth: Optional[int] = None,
+    ) -> Tuple[str, int]:
         """Invite a 3PID to a room.
 
         Args:
@@ -1335,9 +1353,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             txn_id: The transaction ID this is part of, or None if this is not
                 part of a transaction.
             id_access_token: The optional identity server access token.
+            depth: Override the depth used to order the event in the DAG.
+            prev_event_ids: The event IDs to use as the prev events
+                Should normally be set to None, which will cause the depth to be calculated
+                based on the prev_events.
 
         Returns:
-             The new stream ID.
+            Tuple of event ID and stream ordering position
 
         Raises:
             ShadowBanError if the requester has been shadow-banned.
@@ -1383,7 +1405,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             # We don't check the invite against the spamchecker(s) here (through
             # user_may_invite) because we'll do it further down the line anyway (in
             # update_membership_locked).
-            _, stream_id = await self.update_membership(
+            event_id, stream_id = await self.update_membership(
                 requester, UserID.from_string(invitee), room_id, "invite", txn_id=txn_id
             )
         else:
@@ -1402,7 +1424,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     additional_fields=spam_check[1],
                 )
 
-            stream_id = await self._make_and_store_3pid_invite(
+            event, stream_id = await self._make_and_store_3pid_invite(
                 requester,
                 id_server,
                 medium,
@@ -1411,9 +1433,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 inviter,
                 txn_id=txn_id,
                 id_access_token=id_access_token,
+                prev_event_ids=prev_event_ids,
+                depth=depth,
             )
+            event_id = event.event_id
 
-        return stream_id
+        return event_id, stream_id
 
     async def _make_and_store_3pid_invite(
         self,
@@ -1425,7 +1450,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         user: UserID,
         txn_id: Optional[str],
         id_access_token: Optional[str] = None,
-    ) -> int:
+        prev_event_ids: Optional[List[str]] = None,
+        depth: Optional[int] = None,
+    ) -> Tuple[EventBase, int]:
         room_state = await self._storage_controllers.state.get_current_state(
             room_id,
             StateFilter.from_types(
@@ -1518,8 +1545,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             },
             ratelimit=False,
             txn_id=txn_id,
+            prev_event_ids=prev_event_ids,
+            depth=depth,
         )
-        return stream_id
+        return event, stream_id
 
     async def _is_host_in_room(self, current_state_ids: StateMap[str]) -> bool:
         # Have we just created the room, and is this about to be the very
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index df7ffbe545..8ed5272b16 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -709,7 +709,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(37, channel.resource_usage.db_txn_count)
+        self.assertEqual(32, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -722,7 +722,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(41, channel.resource_usage.db_txn_count)
+        self.assertEqual(35, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
@@ -3283,7 +3283,7 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         # Mock a few functions to prevent the test from failing due to failing to talk to
         # a remote IS. We keep the mock for make_and_store_3pid_invite around so we
         # can check its call_count later on during the test.
-        make_invite_mock = Mock(return_value=make_awaitable(0))
+        make_invite_mock = Mock(return_value=make_awaitable((Mock(event_id="abc"), 0)))
         self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock
         self.hs.get_identity_handler().lookup_3pid = Mock(
             return_value=make_awaitable(None),
@@ -3344,7 +3344,7 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         # Mock a few functions to prevent the test from failing due to failing to talk to
         # a remote IS. We keep the mock for make_and_store_3pid_invite around so we
         # can check its call_count later on during the test.
-        make_invite_mock = Mock(return_value=make_awaitable(0))
+        make_invite_mock = Mock(return_value=make_awaitable((Mock(event_id="abc"), 0)))
         self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock
         self.hs.get_identity_handler().lookup_3pid = Mock(
             return_value=make_awaitable(None),

From 2341032cf2d031e58710d82c9ee1d2360f9b82f9 Mon Sep 17 00:00:00 2001
From: jejo86 <28619134+jejo86@users.noreply.github.com>
Date: Wed, 13 Jul 2022 20:33:33 +0200
Subject: [PATCH 118/178] Document advising against publicly exposing the Admin
 API and provide a usage example (#13231)

* Admin API request explanation improved

Pointed out, that the Admin API is not accessible by default from any remote computer, but only from the PC `matrix-synapse` is running on.
Added a full, working example, making sure to include the cURL flag `-X`, which needs to be prepended to `GET`, `POST`, `PUT` etc. and listing the full query string including protocol, IP address and port.

* Admin API request explanation improved

* Apply suggestions from code review

Update changelog. Reword prose.

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
---
 changelog.d/13231.doc                         |  1 +
 docs/usage/administration/admin_api/README.md | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 changelog.d/13231.doc

diff --git a/changelog.d/13231.doc b/changelog.d/13231.doc
new file mode 100644
index 0000000000..e750f9da49
--- /dev/null
+++ b/changelog.d/13231.doc
@@ -0,0 +1 @@
+Provide an example of using the Admin API. Contributed by @jejo86.
diff --git a/docs/usage/administration/admin_api/README.md b/docs/usage/administration/admin_api/README.md
index 3cbedc5dfa..c60b6da0de 100644
--- a/docs/usage/administration/admin_api/README.md
+++ b/docs/usage/administration/admin_api/README.md
@@ -18,6 +18,11 @@ already on your `$PATH` depending on how Synapse was installed.
 Finding your user's `access_token` is client-dependent, but will usually be shown in the client's settings.
 
 ## Making an Admin API request
+For security reasons, we [recommend](reverse_proxy.md#synapse-administration-endpoints)
+that the Admin API (`/_synapse/admin/...`) should be hidden from public view using a
+reverse proxy. This means you should typically query the Admin API from a terminal on
+the machine which runs Synapse.
+
 Once you have your `access_token`, you will need to authenticate each request to an Admin API endpoint by
 providing the token as either a query parameter or a request header. To add it as a request header in cURL:
 
@@ -25,5 +30,17 @@ providing the token as either a query parameter or a request header. To add it a
 curl --header "Authorization: Bearer <access_token>" <the_rest_of_your_API_request>
 ```
 
+For example, suppose we want to
+[query the account](user_admin_api.md#query-user-account) of the user
+`@foo:bar.com`. We need an admin access token (e.g.
+`syt_AjfVef2_L33JNpafeif_0feKJfeaf0CQpoZk`), and we need to know which port
+Synapse's [`client` listener](config_documentation.md#listeners) is listening
+on (e.g. `8008`). Then we can use the following command to request the account
+information from the Admin API.
+
+```sh
+curl --header "Authorization: Bearer syt_AjfVef2_L33JNpafeif_0feKJfeaf0CQpoZk" -X GET http://127.0.0.1:8008/_synapse/admin/v2/users/@foo:bar.com
+```
+
 For more details on access tokens in Matrix, please refer to the complete
 [matrix spec documentation](https://matrix.org/docs/spec/client_server/r0.6.1#using-access-tokens).

From ad5761b65cf6a3fe203d8ffc425ed45b3a0a02fa Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 13 Jul 2022 11:36:02 -0700
Subject: [PATCH 119/178] Add support for room version 10 (#13220)

---
 changelog.d/13220.feature    |  1 +
 synapse/api/room_versions.py | 33 +++++++++++++++++++++++++++++
 synapse/event_auth.py        | 26 +++++++++++++++++++++++
 tests/test_event_auth.py     | 41 +++++++++++++++++++++++++++++++++++-
 4 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13220.feature

diff --git a/changelog.d/13220.feature b/changelog.d/13220.feature
new file mode 100644
index 0000000000..9b0240fdc8
--- /dev/null
+++ b/changelog.d/13220.feature
@@ -0,0 +1 @@
+Add support for room version 10.
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 3f85d61b46..00e81b3afc 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -84,6 +84,8 @@ class RoomVersion:
     # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of
     # knocks and restricted join rules into the same join condition.
     msc3787_knock_restricted_join_rule: bool
+    # MSC3667: Enforce integer power levels
+    msc3667_int_only_power_levels: bool
 
 
 class RoomVersions:
@@ -103,6 +105,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V2 = RoomVersion(
         "2",
@@ -120,6 +123,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V3 = RoomVersion(
         "3",
@@ -137,6 +141,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V4 = RoomVersion(
         "4",
@@ -154,6 +159,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V5 = RoomVersion(
         "5",
@@ -171,6 +177,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V6 = RoomVersion(
         "6",
@@ -188,6 +195,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     MSC2176 = RoomVersion(
         "org.matrix.msc2176",
@@ -205,6 +213,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V7 = RoomVersion(
         "7",
@@ -222,6 +231,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V8 = RoomVersion(
         "8",
@@ -239,6 +249,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     V9 = RoomVersion(
         "9",
@@ -256,6 +267,7 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     MSC2716v3 = RoomVersion(
         "org.matrix.msc2716v3",
@@ -273,6 +285,7 @@ class RoomVersions:
         msc2716_historical=True,
         msc2716_redactions=True,
         msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
     )
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
@@ -290,6 +303,25 @@ class RoomVersions:
         msc2716_historical=False,
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
+        msc3667_int_only_power_levels=False,
+    )
+    V10 = RoomVersion(
+        "10",
+        RoomDisposition.STABLE,
+        EventFormatVersions.V3,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2176_redaction_rules=False,
+        msc3083_join_rules=True,
+        msc3375_redaction_rules=True,
+        msc2403_knocking=True,
+        msc2716_historical=False,
+        msc2716_redactions=False,
+        msc3787_knock_restricted_join_rule=True,
+        msc3667_int_only_power_levels=True,
     )
 
 
@@ -308,6 +340,7 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.V9,
         RoomVersions.MSC2716v3,
         RoomVersions.MSC3787,
+        RoomVersions.V10,
     )
 }
 
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 0fc2c4b27e..965cb265da 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -740,6 +740,32 @@ def _check_power_levels(
         except Exception:
             raise SynapseError(400, "Not a valid power level: %s" % (v,))
 
+    # Reject events with stringy power levels if required by room version
+    if (
+        event.type == EventTypes.PowerLevels
+        and room_version_obj.msc3667_int_only_power_levels
+    ):
+        for k, v in event.content.items():
+            if k in {
+                "users_default",
+                "events_default",
+                "state_default",
+                "ban",
+                "redact",
+                "kick",
+                "invite",
+            }:
+                if not isinstance(v, int):
+                    raise SynapseError(400, f"{v!r} must be an integer.")
+            if k in {"events", "notifications", "users"}:
+                if not isinstance(v, dict) or not all(
+                    isinstance(v, int) for v in v.values()
+                ):
+                    raise SynapseError(
+                        400,
+                        f"{v!r} must be a dict wherein all the values are integers.",
+                    )
+
     key = (event.type, event.state_key)
     current_state = auth_events.get(key)
 
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index 371cd201af..e42d7b9ba0 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -19,7 +19,7 @@ from parameterized import parameterized
 
 from synapse import event_auth
 from synapse.api.constants import EventContentFields
-from synapse.api.errors import AuthError
+from synapse.api.errors import AuthError, SynapseError
 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions
 from synapse.events import EventBase, make_event_from_dict
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
@@ -689,6 +689,45 @@ class EventAuthTestCase(unittest.TestCase):
             auth_events.values(),
         )
 
+    def test_room_v10_rejects_string_power_levels(self) -> None:
+        pl_event_content = {"users_default": "42"}
+        pl_event = make_event_from_dict(
+            {
+                "room_id": TEST_ROOM_ID,
+                **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10),
+                "type": "m.room.power_levels",
+                "sender": "@test:test.com",
+                "state_key": "",
+                "content": pl_event_content,
+                "signatures": {"test.com": {"ed25519:0": "some9signature"}},
+            },
+            room_version=RoomVersions.V10,
+        )
+
+        pl_event2_content = {"events": {"m.room.name": "42", "m.room.power_levels": 42}}
+        pl_event2 = make_event_from_dict(
+            {
+                "room_id": TEST_ROOM_ID,
+                **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10),
+                "type": "m.room.power_levels",
+                "sender": "@test:test.com",
+                "state_key": "",
+                "content": pl_event2_content,
+                "signatures": {"test.com": {"ed25519:0": "some9signature"}},
+            },
+            room_version=RoomVersions.V10,
+        )
+
+        with self.assertRaises(SynapseError):
+            event_auth._check_power_levels(
+                pl_event.room_version, pl_event, {("fake_type", "fake_key"): pl_event2}
+            )
+
+        with self.assertRaises(SynapseError):
+            event_auth._check_power_levels(
+                pl_event.room_version, pl_event2, {("fake_type", "fake_key"): pl_event}
+            )
+
 
 # helpers for making events
 TEST_DOMAIN = "example.com"

From cc1071598ad45e682a14dc8b3c1fe553e8158593 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jacek=20Ku=C5=9Bnierz?= <kusnierz@protonmail.com>
Date: Wed, 13 Jul 2022 20:43:17 +0200
Subject: [PATCH 120/178] Call the v2 identity service `/3pid/unbind` endpoint,
 rather than v1.  (#13240)

* Drop support for v1 unbind

Signed-off-by: Jacek Kusnierz <jacek.kusnierz@tum.de>

* Add changelog

Signed-off-by: Jacek Kusnierz <jacek.kusnierz@tum.de>

* Update changelog.d/13240.misc
---
 changelog.d/13240.misc       | 1 +
 synapse/handlers/identity.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13240.misc

diff --git a/changelog.d/13240.misc b/changelog.d/13240.misc
new file mode 100644
index 0000000000..0567e47d64
--- /dev/null
+++ b/changelog.d/13240.misc
@@ -0,0 +1 @@
+Call the v2 identity service `/3pid/unbind` endpoint, rather than v1.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 164d891e90..9571d461c8 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -281,8 +281,8 @@ class IdentityHandler:
                 "id_server must be a valid hostname with optional port and path components",
             )
 
-        url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server,)
-        url_bytes = b"/_matrix/identity/api/v1/3pid/unbind"
+        url = "https://%s/_matrix/identity/v2/3pid/unbind" % (id_server,)
+        url_bytes = b"/_matrix/identity/v2/3pid/unbind"
 
         content = {
             "mxid": mxid,

From 0eb7e697682a8033564df6d602a7098d9a93d03e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 13 Jul 2022 19:48:24 +0100
Subject: [PATCH 121/178] Notifier: accept callbacks to fire on room joins
 (#13254)

---
 changelog.d/13254.misc |  1 +
 synapse/notifier.py    | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 changelog.d/13254.misc

diff --git a/changelog.d/13254.misc b/changelog.d/13254.misc
new file mode 100644
index 0000000000..cba6b9ee0f
--- /dev/null
+++ b/changelog.d/13254.misc
@@ -0,0 +1 @@
+Preparatory work for a per-room rate limiter on joins.
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 54b0ec4b97..c42bb8266a 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -228,6 +228,7 @@ class Notifier:
 
         # Called when there are new things to stream over replication
         self.replication_callbacks: List[Callable[[], None]] = []
+        self._new_join_in_room_callbacks: List[Callable[[str, str], None]] = []
 
         self._federation_client = hs.get_federation_http_client()
 
@@ -280,6 +281,19 @@ class Notifier:
         """
         self.replication_callbacks.append(cb)
 
+    def add_new_join_in_room_callback(self, cb: Callable[[str, str], None]) -> None:
+        """Add a callback that will be called when a user joins a room.
+
+        This only fires on genuine membership changes, e.g. "invite" -> "join".
+        Membership transitions like "join" -> "join" (for e.g. displayname changes) do
+        not trigger the callback.
+
+        When called, the callback receives two arguments: the event ID and the room ID.
+        It should *not* return a Deferred - if it needs to do any asynchronous work, a
+        background thread should be started and wrapped with run_as_background_process.
+        """
+        self._new_join_in_room_callbacks.append(cb)
+
     async def on_new_room_event(
         self,
         event: EventBase,
@@ -723,6 +737,10 @@ class Notifier:
         for cb in self.replication_callbacks:
             cb()
 
+    def notify_user_joined_room(self, event_id: str, room_id: str) -> None:
+        for cb in self._new_join_in_room_callbacks:
+            cb(event_id, room_id)
+
     def notify_remote_server_up(self, server: str) -> None:
         """Notify any replication that a remote server has come back up"""
         # We call federation_sender directly rather than registering as a

From 599c403d996ed5d66cacd63abfd2e7a87279b927 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 13 Jul 2022 20:09:42 +0100
Subject: [PATCH 122/178] Allow rate limiters to passively record actions they
 cannot limit (#13253)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13253.misc         |  1 +
 synapse/api/ratelimiting.py    | 94 +++++++++++++++++++++++++++++-----
 tests/api/test_ratelimiting.py | 74 ++++++++++++++++++++++++++
 3 files changed, 157 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13253.misc

diff --git a/changelog.d/13253.misc b/changelog.d/13253.misc
new file mode 100644
index 0000000000..cba6b9ee0f
--- /dev/null
+++ b/changelog.d/13253.misc
@@ -0,0 +1 @@
+Preparatory work for a per-room rate limiter on joins.
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 54d13026c9..f43965c1c8 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -27,6 +27,33 @@ class Ratelimiter:
     """
     Ratelimit actions marked by arbitrary keys.
 
+    (Note that the source code speaks of "actions" and "burst_count" rather than
+    "tokens" and a "bucket_size".)
+
+    This is a "leaky bucket as a meter". For each key to be tracked there is a bucket
+    containing some number 0 <= T <= `burst_count` of tokens corresponding to previously
+    permitted requests for that key. Each bucket starts empty, and gradually leaks
+    tokens at a rate of `rate_hz`.
+
+    Upon an incoming request, we must determine:
+    - the key that this request falls under (which bucket to inspect), and
+    - the cost C of this request in tokens.
+    Then, if there is room in the bucket for C tokens (T + C <= `burst_count`),
+    the request is permitted and `cost` tokens are added to the bucket.
+    Otherwise the request is denied, and the bucket continues to hold T tokens.
+
+    This means that the limiter enforces an average request frequency of `rate_hz`,
+    while accumulating a buffer of up to `burst_count` requests which can be consumed
+    instantaneously.
+
+    The tricky bit is the leaking. We do not want to have a periodic process which
+    leaks every bucket! Instead, we track
+    - the time point when the bucket was last completely empty, and
+    - how many tokens have added to the bucket permitted since then.
+    Then for each incoming request, we can calculate how many tokens have leaked
+    since this time point, and use that to decide if we should accept or reject the
+    request.
+
     Args:
         clock: A homeserver clock, for retrieving the current time
         rate_hz: The long term number of actions that can be performed in a second.
@@ -41,14 +68,30 @@ class Ratelimiter:
         self.burst_count = burst_count
         self.store = store
 
-        # A ordered dictionary keeping track of actions, when they were last
-        # performed and how often. Each entry is a mapping from a key of arbitrary type
-        # to a tuple representing:
-        #   * How many times an action has occurred since a point in time
-        #   * The point in time
-        #   * The rate_hz of this particular entry. This can vary per request
+        # An ordered dictionary representing the token buckets tracked by this rate
+        # limiter. Each entry maps a key of arbitrary type to a tuple representing:
+        #   * The number of tokens currently in the bucket,
+        #   * The time point when the bucket was last completely empty, and
+        #   * The rate_hz (leak rate) of this particular bucket.
         self.actions: OrderedDict[Hashable, Tuple[float, float, float]] = OrderedDict()
 
+    def _get_key(
+        self, requester: Optional[Requester], key: Optional[Hashable]
+    ) -> Hashable:
+        """Use the requester's MXID as a fallback key if no key is provided."""
+        if key is None:
+            if not requester:
+                raise ValueError("Must supply at least one of `requester` or `key`")
+
+            key = requester.user.to_string()
+        return key
+
+    def _get_action_counts(
+        self, key: Hashable, time_now_s: float
+    ) -> Tuple[float, float, float]:
+        """Retrieve the action counts, with a fallback representing an empty bucket."""
+        return self.actions.get(key, (0.0, time_now_s, 0.0))
+
     async def can_do_action(
         self,
         requester: Optional[Requester],
@@ -88,11 +131,7 @@ class Ratelimiter:
                 * The reactor timestamp for when the action can be performed next.
                   -1 if rate_hz is less than or equal to zero
         """
-        if key is None:
-            if not requester:
-                raise ValueError("Must supply at least one of `requester` or `key`")
-
-            key = requester.user.to_string()
+        key = self._get_key(requester, key)
 
         if requester:
             # Disable rate limiting of users belonging to any AS that is configured
@@ -121,7 +160,7 @@ class Ratelimiter:
         self._prune_message_counts(time_now_s)
 
         # Check if there is an existing count entry for this key
-        action_count, time_start, _ = self.actions.get(key, (0.0, time_now_s, 0.0))
+        action_count, time_start, _ = self._get_action_counts(key, time_now_s)
 
         # Check whether performing another action is allowed
         time_delta = time_now_s - time_start
@@ -164,6 +203,37 @@ class Ratelimiter:
 
         return allowed, time_allowed
 
+    def record_action(
+        self,
+        requester: Optional[Requester],
+        key: Optional[Hashable] = None,
+        n_actions: int = 1,
+        _time_now_s: Optional[float] = None,
+    ) -> None:
+        """Record that an action(s) took place, even if they violate the rate limit.
+
+        This is useful for tracking the frequency of events that happen across
+        federation which we still want to impose local rate limits on. For instance, if
+        we are alice.com monitoring a particular room, we cannot prevent bob.com
+        from joining users to that room. However, we can track the number of recent
+        joins in the room and refuse to serve new joins ourselves if there have been too
+        many in the room across both homeservers.
+
+        Args:
+            requester: The requester that is doing the action, if any.
+            key: An arbitrary key used to classify an action. Defaults to the
+                requester's user ID.
+            n_actions: The number of times the user wants to do this action. If the user
+                cannot do all of the actions, the user's action count is not incremented
+                at all.
+            _time_now_s: The current time. Optional, defaults to the current time according
+                to self.clock. Only used by tests.
+        """
+        key = self._get_key(requester, key)
+        time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()
+        action_count, time_start, rate_hz = self._get_action_counts(key, time_now_s)
+        self.actions[key] = (action_count + n_actions, time_start, rate_hz)
+
     def _prune_message_counts(self, time_now_s: float) -> None:
         """Remove message count entries that have not exceeded their defined
         rate_hz limit
diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py
index 18649c2c05..c86f783c5b 100644
--- a/tests/api/test_ratelimiting.py
+++ b/tests/api/test_ratelimiting.py
@@ -314,3 +314,77 @@ class TestRatelimiter(unittest.HomeserverTestCase):
 
         # Check that we get rate limited after using that token.
         self.assertFalse(consume_at(11.1))
+
+    def test_record_action_which_doesnt_fill_bucket(self) -> None:
+        limiter = Ratelimiter(
+            store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3
+        )
+
+        # Observe two actions, leaving room in the bucket for one more.
+        limiter.record_action(requester=None, key="a", n_actions=2, _time_now_s=0.0)
+
+        # We should be able to take a new action now.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=0.0)
+        )
+        self.assertTrue(success)
+
+        # ... but not two.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=0.0)
+        )
+        self.assertFalse(success)
+
+    def test_record_action_which_fills_bucket(self) -> None:
+        limiter = Ratelimiter(
+            store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3
+        )
+
+        # Observe three actions, filling up the bucket.
+        limiter.record_action(requester=None, key="a", n_actions=3, _time_now_s=0.0)
+
+        # We should be unable to take a new action now.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=0.0)
+        )
+        self.assertFalse(success)
+
+        # If we wait 10 seconds to leak a token, we should be able to take one action...
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=10.0)
+        )
+        self.assertTrue(success)
+
+        # ... but not two.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=10.0)
+        )
+        self.assertFalse(success)
+
+    def test_record_action_which_overfills_bucket(self) -> None:
+        limiter = Ratelimiter(
+            store=self.hs.get_datastores().main, clock=None, rate_hz=0.1, burst_count=3
+        )
+
+        # Observe four actions, exceeding the bucket.
+        limiter.record_action(requester=None, key="a", n_actions=4, _time_now_s=0.0)
+
+        # We should be prevented from taking a new action now.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=0.0)
+        )
+        self.assertFalse(success)
+
+        # If we wait 10 seconds to leak a token, we should be unable to take an action
+        # because the bucket is still full.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=10.0)
+        )
+        self.assertFalse(success)
+
+        # But after another 10 seconds we leak a second token, giving us room for
+        # action.
+        success, _ = self.get_success_or_raise(
+            limiter.can_do_action(requester=None, key="a", _time_now_s=20.0)
+        )
+        self.assertTrue(success)

From 0ca4172b5df6ea5f3e30ca6b83e51d230213971c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 14 Jul 2022 14:57:02 +0100
Subject: [PATCH 123/178] Don't pull out state in `compute_event_context` for
 unconflicted state (#13267)

---
 changelog.d/13267.misc                        |   1 +
 synapse/handlers/message.py                   |   7 +-
 synapse/state/__init__.py                     | 117 ++++++++++--------
 synapse/storage/controllers/__init__.py       |   4 +-
 synapse/storage/controllers/persist_events.py |  12 +-
 synapse/storage/databases/main/roommember.py  |  35 ++----
 tests/storage/test_roommember.py              |  55 --------
 7 files changed, 95 insertions(+), 136 deletions(-)
 create mode 100644 changelog.d/13267.misc

diff --git a/changelog.d/13267.misc b/changelog.d/13267.misc
new file mode 100644
index 0000000000..a334414320
--- /dev/null
+++ b/changelog.d/13267.misc
@@ -0,0 +1 @@
+Don't pull out state in `compute_event_context` for unconflicted state.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 1980e37dae..b5fede9496 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1444,7 +1444,12 @@ class EventCreationHandler:
             if state_entry.state_group in self._external_cache_joined_hosts_updates:
                 return
 
-            joined_hosts = await self.store.get_joined_hosts(event.room_id, state_entry)
+            state = await state_entry.get_state(
+                self._storage_controllers.state, StateFilter.all()
+            )
+            joined_hosts = await self.store.get_joined_hosts(
+                event.room_id, state, state_entry
+            )
 
             # Note that the expiry times must be larger than the expiry time in
             # _external_cache_joined_hosts_updates.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 781d9f06da..9f0a36652c 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -31,7 +31,6 @@ from typing import (
     Sequence,
     Set,
     Tuple,
-    Union,
 )
 
 import attr
@@ -47,6 +46,7 @@ from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServ
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.roommember import ProfileInfo
+from synapse.storage.state import StateFilter
 from synapse.types import StateMap
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -54,6 +54,7 @@ from synapse.util.metrics import Measure, measure_func
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
+    from synapse.storage.controllers import StateStorageController
     from synapse.storage.databases.main import DataStore
 
 logger = logging.getLogger(__name__)
@@ -83,17 +84,20 @@ def _gen_state_id() -> str:
 
 
 class _StateCacheEntry:
-    __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"]
+    __slots__ = ["state", "state_group", "prev_group", "delta_ids"]
 
     def __init__(
         self,
-        state: StateMap[str],
+        state: Optional[StateMap[str]],
         state_group: Optional[int],
         prev_group: Optional[int] = None,
         delta_ids: Optional[StateMap[str]] = None,
     ):
+        if state is None and state_group is None:
+            raise Exception("Either state or state group must be not None")
+
         # A map from (type, state_key) to event_id.
-        self.state = frozendict(state)
+        self.state = frozendict(state) if state is not None else None
 
         # the ID of a state group if one and only one is involved.
         # otherwise, None otherwise?
@@ -102,20 +106,30 @@ class _StateCacheEntry:
         self.prev_group = prev_group
         self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None
 
-        # The `state_id` is a unique ID we generate that can be used as ID for
-        # this collection of state. Usually this would be the same as the
-        # state group, but on worker instances we can't generate a new state
-        # group each time we resolve state, so we generate a separate one that
-        # isn't persisted and is used solely for caches.
-        # `state_id` is either a state_group (and so an int) or a string. This
-        # ensures we don't accidentally persist a state_id as a stateg_group
-        if state_group:
-            self.state_id: Union[str, int] = state_group
-        else:
-            self.state_id = _gen_state_id()
+    async def get_state(
+        self,
+        state_storage: "StateStorageController",
+        state_filter: Optional["StateFilter"] = None,
+    ) -> StateMap[str]:
+        """Get the state map for this entry, either from the in-memory state or
+        looking up the state group in the DB.
+        """
+
+        if self.state is not None:
+            return self.state
+
+        assert self.state_group is not None
+
+        return await state_storage.get_state_ids_for_group(
+            self.state_group, state_filter
+        )
 
     def __len__(self) -> int:
-        return len(self.state)
+        # The len should is used to estimate how large this cache entry is, for
+        # cache eviction purposes. This is why if `self.state` is None it's fine
+        # to return 1.
+
+        return len(self.state) if self.state else 1
 
 
 class StateHandler:
@@ -153,7 +167,7 @@ class StateHandler:
         """
         logger.debug("calling resolve_state_groups from get_current_state_ids")
         ret = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
-        return ret.state
+        return await ret.get_state(self._state_storage_controller, StateFilter.all())
 
     async def get_current_users_in_room(
         self, room_id: str, latest_event_ids: List[str]
@@ -177,7 +191,8 @@ class StateHandler:
 
         logger.debug("calling resolve_state_groups from get_current_users_in_room")
         entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
-        return await self.store.get_joined_users_from_state(room_id, entry)
+        state = await entry.get_state(self._state_storage_controller, StateFilter.all())
+        return await self.store.get_joined_users_from_state(room_id, state, entry)
 
     async def get_hosts_in_room_at_events(
         self, room_id: str, event_ids: Collection[str]
@@ -192,7 +207,8 @@ class StateHandler:
             The hosts in the room at the given events
         """
         entry = await self.resolve_state_groups_for_events(room_id, event_ids)
-        return await self.store.get_joined_hosts(room_id, entry)
+        state = await entry.get_state(self._state_storage_controller, StateFilter.all())
+        return await self.store.get_joined_hosts(room_id, state, entry)
 
     async def compute_event_context(
         self,
@@ -227,10 +243,19 @@ class StateHandler:
         #
         if state_ids_before_event:
             # if we're given the state before the event, then we use that
-            state_group_before_event = None
             state_group_before_event_prev_group = None
             deltas_to_state_group_before_event = None
-            entry = None
+
+            # .. though we need to get a state group for it.
+            state_group_before_event = (
+                await self._state_storage_controller.store_state_group(
+                    event.event_id,
+                    event.room_id,
+                    prev_group=None,
+                    delta_ids=None,
+                    current_state_ids=state_ids_before_event,
+                )
+            )
 
         else:
             # otherwise, we'll need to resolve the state across the prev_events.
@@ -264,36 +289,27 @@ class StateHandler:
                 await_full_state=False,
             )
 
-            state_ids_before_event = entry.state
-            state_group_before_event = entry.state_group
             state_group_before_event_prev_group = entry.prev_group
             deltas_to_state_group_before_event = entry.delta_ids
 
-        #
-        # make sure that we have a state group at that point. If it's not a state event,
-        # that will be the state group for the new event. If it *is* a state event,
-        # it might get rejected (in which case we'll need to persist it with the
-        # previous state group)
-        #
-
-        if not state_group_before_event:
-            state_group_before_event = (
-                await self._state_storage_controller.store_state_group(
-                    event.event_id,
-                    event.room_id,
-                    prev_group=state_group_before_event_prev_group,
-                    delta_ids=deltas_to_state_group_before_event,
-                    current_state_ids=state_ids_before_event,
+            # We make sure that we have a state group assigned to the state.
+            if entry.state_group is None:
+                state_ids_before_event = await entry.get_state(
+                    self._state_storage_controller, StateFilter.all()
+                )
+                state_group_before_event = (
+                    await self._state_storage_controller.store_state_group(
+                        event.event_id,
+                        event.room_id,
+                        prev_group=state_group_before_event_prev_group,
+                        delta_ids=deltas_to_state_group_before_event,
+                        current_state_ids=state_ids_before_event,
+                    )
                 )
-            )
-
-            # Assign the new state group to the cached state entry.
-            #
-            # Note that this can race in that we could generate multiple state
-            # groups for the same state entry, but that is just inefficient
-            # rather than dangerous.
-            if entry and entry.state_group is None:
                 entry.state_group = state_group_before_event
+            else:
+                state_group_before_event = entry.state_group
+                state_ids_before_event = None
 
         #
         # now if it's not a state event, we're done
@@ -313,6 +329,10 @@ class StateHandler:
         #
         # otherwise, we'll need to create a new state group for after the event
         #
+        if state_ids_before_event is None:
+            state_ids_before_event = await entry.get_state(
+                self._state_storage_controller, StateFilter.all()
+            )
 
         key = (event.type, event.state_key)
         if key in state_ids_before_event:
@@ -372,9 +392,6 @@ class StateHandler:
         state_group_ids_set = set(state_group_ids)
         if len(state_group_ids_set) == 1:
             (state_group_id,) = state_group_ids_set
-            state = await self._state_storage_controller.get_state_for_groups(
-                state_group_ids_set
-            )
             (
                 prev_group,
                 delta_ids,
@@ -382,7 +399,7 @@ class StateHandler:
                 state_group_id
             )
             return _StateCacheEntry(
-                state=state[state_group_id],
+                state=None,
                 state_group=state_group_id,
                 prev_group=prev_group,
                 delta_ids=delta_ids,
diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py
index 55649719f6..45101cda7a 100644
--- a/synapse/storage/controllers/__init__.py
+++ b/synapse/storage/controllers/__init__.py
@@ -43,4 +43,6 @@ class StorageControllers:
 
         self.persistence = None
         if stores.persist_events:
-            self.persistence = EventsPersistenceStorageController(hs, stores)
+            self.persistence = EventsPersistenceStorageController(
+                hs, stores, self.state
+            )
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index ea499ce0f8..af65e5913b 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -48,9 +48,11 @@ from synapse.events.snapshot import EventContext
 from synapse.logging import opentracing
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage.controllers.state import StateStorageController
 from synapse.storage.databases import Databases
 from synapse.storage.databases.main.events import DeltaState
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
+from synapse.storage.state import StateFilter
 from synapse.types import (
     PersistedEventPosition,
     RoomStreamToken,
@@ -308,7 +310,12 @@ class EventsPersistenceStorageController:
     current state and forward extremity changes.
     """
 
-    def __init__(self, hs: "HomeServer", stores: Databases):
+    def __init__(
+        self,
+        hs: "HomeServer",
+        stores: Databases,
+        state_controller: StateStorageController,
+    ):
         # We ultimately want to split out the state store from the main store,
         # so we use separate variables here even though they point to the same
         # store for now.
@@ -325,6 +332,7 @@ class EventsPersistenceStorageController:
             self._process_event_persist_queue_task
         )
         self._state_resolution_handler = hs.get_state_resolution_handler()
+        self._state_controller = state_controller
 
     async def _process_event_persist_queue_task(
         self,
@@ -504,7 +512,7 @@ class EventsPersistenceStorageController:
             state_res_store=StateResolutionStore(self.main_store),
         )
 
-        return res.state
+        return await res.get_state(self._state_controller, StateFilter.all())
 
     async def _persist_event_batch(
         self, _room_id: str, task: _PersistEventsTask
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 0b5e4e4254..71a65d565a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -31,7 +31,6 @@ import attr
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import (
     run_as_background_process,
@@ -780,26 +779,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return shared_room_ids or frozenset()
 
-    async def get_joined_users_from_context(
-        self, event: EventBase, context: EventContext
-    ) -> Dict[str, ProfileInfo]:
-        state_group: Union[object, int] = context.state_group
-        if not state_group:
-            # If state_group is None it means it has yet to be assigned a
-            # state group, i.e. we need to make sure that calls with a state_group
-            # of None don't hit previous cached calls with a None state_group.
-            # To do this we set the state_group to a new object as object() != object()
-            state_group = object()
-
-        current_state_ids = await context.get_current_state_ids()
-        assert current_state_ids is not None
-        assert state_group is not None
-        return await self._get_joined_users_from_context(
-            event.room_id, state_group, current_state_ids, event=event, context=context
-        )
-
     async def get_joined_users_from_state(
-        self, room_id: str, state_entry: "_StateCacheEntry"
+        self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
     ) -> Dict[str, ProfileInfo]:
         state_group: Union[object, int] = state_entry.state_group
         if not state_group:
@@ -812,18 +793,17 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         assert state_group is not None
         with Measure(self._clock, "get_joined_users_from_state"):
             return await self._get_joined_users_from_context(
-                room_id, state_group, state_entry.state, context=state_entry
+                room_id, state_group, state, context=state_entry
             )
 
-    @cached(num_args=2, cache_context=True, iterable=True, max_entries=100000)
+    @cached(num_args=2, iterable=True, max_entries=100000)
     async def _get_joined_users_from_context(
         self,
         room_id: str,
         state_group: Union[object, int],
         current_state_ids: StateMap[str],
-        cache_context: _CacheContext,
         event: Optional[EventBase] = None,
-        context: Optional[Union[EventContext, "_StateCacheEntry"]] = None,
+        context: Optional["_StateCacheEntry"] = None,
     ) -> Dict[str, ProfileInfo]:
         # We don't use `state_group`, it's there so that we can cache based
         # on it. However, it's important that it's never None, since two current_states
@@ -1017,7 +997,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
 
     async def get_joined_hosts(
-        self, room_id: str, state_entry: "_StateCacheEntry"
+        self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
     ) -> FrozenSet[str]:
         state_group: Union[object, int] = state_entry.state_group
         if not state_group:
@@ -1030,7 +1010,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         assert state_group is not None
         with Measure(self._clock, "get_joined_hosts"):
             return await self._get_joined_hosts(
-                room_id, state_group, state_entry=state_entry
+                room_id, state_group, state, state_entry=state_entry
             )
 
     @cached(num_args=2, max_entries=10000, iterable=True)
@@ -1038,6 +1018,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         self,
         room_id: str,
         state_group: Union[object, int],
+        state: StateMap[str],
         state_entry: "_StateCacheEntry",
     ) -> FrozenSet[str]:
         # We don't use `state_group`, it's there so that we can cache based on
@@ -1093,7 +1074,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 # The cache doesn't match the state group or prev state group,
                 # so we calculate the result from first principles.
                 joined_users = await self.get_joined_users_from_state(
-                    room_id, state_entry
+                    room_id, state, state_entry
                 )
 
                 cache.hosts_to_joined_users = {}
diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py
index 1218786d79..240b02cb9f 100644
--- a/tests/storage/test_roommember.py
+++ b/tests/storage/test_roommember.py
@@ -23,7 +23,6 @@ from synapse.util import Clock
 
 from tests import unittest
 from tests.server import TestHomeServer
-from tests.test_utils import event_injection
 
 
 class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
@@ -110,60 +109,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
         # It now knows about Charlie's server.
         self.assertEqual(self.store._known_servers_count, 2)
 
-    def test_get_joined_users_from_context(self) -> None:
-        room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
-        bob_event = self.get_success(
-            event_injection.inject_member_event(
-                self.hs, room, self.u_bob, Membership.JOIN
-            )
-        )
-
-        # first, create a regular event
-        event, context = self.get_success(
-            event_injection.create_event(
-                self.hs,
-                room_id=room,
-                sender=self.u_alice,
-                prev_event_ids=[bob_event.event_id],
-                type="m.test.1",
-                content={},
-            )
-        )
-
-        users = self.get_success(
-            self.store.get_joined_users_from_context(event, context)
-        )
-        self.assertEqual(users.keys(), {self.u_alice, self.u_bob})
-
-        # Regression test for #7376: create a state event whose key matches bob's
-        # user_id, but which is *not* a membership event, and persist that; then check
-        # that `get_joined_users_from_context` returns the correct users for the next event.
-        non_member_event = self.get_success(
-            event_injection.inject_event(
-                self.hs,
-                room_id=room,
-                sender=self.u_bob,
-                prev_event_ids=[bob_event.event_id],
-                type="m.test.2",
-                state_key=self.u_bob,
-                content={},
-            )
-        )
-        event, context = self.get_success(
-            event_injection.create_event(
-                self.hs,
-                room_id=room,
-                sender=self.u_alice,
-                prev_event_ids=[non_member_event.event_id],
-                type="m.test.3",
-                content={},
-            )
-        )
-        users = self.get_success(
-            self.store.get_joined_users_from_context(event, context)
-        )
-        self.assertEqual(users.keys(), {self.u_alice, self.u_bob})
-
     def test__null_byte_in_display_name_properly_handled(self) -> None:
         room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
 

From df55b377bef7b23fa7245cb2942e47b6266f50af Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 14 Jul 2022 15:07:52 +0100
Subject: [PATCH 124/178] CHANGES.md: fix link to upgrade notes

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index bcf9fae4a5..78345c75fc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,7 +1,7 @@
 Synapse vNext
 =============
 
-As of this release, Synapse no longer allows the tasks of verifying email address ownership, and password reset confirmation, to be delegated to an identity server. For more information, see the [upgrade notes](https://github.com/matrix-org/synapse/blob/release-v1.63/docs/upgrade.md#upgrading-to-v1630).
+As of this release, Synapse no longer allows the tasks of verifying email address ownership, and password reset confirmation, to be delegated to an identity server. For more information, see the [upgrade notes](https://matrix-org.github.io/synapse/v1.64/upgrade.html#upgrading-to-v1640).
 
 Synapse 1.63.0rc1 (2022-07-12)
 ==============================

From fe15a865a5a5bc8e89d770e43dae702aa2a809cb Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 14 Jul 2022 22:52:26 +0100
Subject: [PATCH 125/178] Rip out auth-event reconciliation code (#12943)

There is a corner in `_check_event_auth` (long known as "the weird corner") where, if we get an event with auth_events which don't match those we were expecting, we attempt to resolve the diffence between our state and the remote's with a state resolution.

This isn't specced, and there's general agreement we shouldn't be doing it.

However, it turns out that the faster-joins code was relying on it, so we need to introduce something similar (but rather simpler) for that.
---
 changelog.d/12943.misc                      |   1 +
 synapse/handlers/federation_event.py        | 281 ++++++--------------
 synapse/state/__init__.py                   |  26 --
 tests/handlers/test_federation.py           | 140 +---------
 tests/rest/client/test_third_party_rules.py |  11 +-
 tests/test_federation.py                    |   8 +-
 6 files changed, 90 insertions(+), 377 deletions(-)
 create mode 100644 changelog.d/12943.misc

diff --git a/changelog.d/12943.misc b/changelog.d/12943.misc
new file mode 100644
index 0000000000..f66bb3ec32
--- /dev/null
+++ b/changelog.d/12943.misc
@@ -0,0 +1 @@
+Remove code which incorrectly attempted to reconcile state with remote servers when processing incoming events.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index c74117c19a..b1dab57447 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import itertools
 import logging
 from http import HTTPStatus
@@ -347,7 +348,7 @@ class FederationEventHandler:
         event.internal_metadata.send_on_behalf_of = origin
 
         context = await self._state_handler.compute_event_context(event)
-        context = await self._check_event_auth(origin, event, context)
+        await self._check_event_auth(origin, event, context)
         if context.rejected:
             raise SynapseError(
                 403, f"{event.membership} event was rejected", Codes.FORBIDDEN
@@ -485,7 +486,7 @@ class FederationEventHandler:
                 partial_state=partial_state,
             )
 
-            context = await self._check_event_auth(origin, event, context)
+            await self._check_event_auth(origin, event, context)
             if context.rejected:
                 raise SynapseError(400, "Join event was rejected")
 
@@ -1116,11 +1117,7 @@ class FederationEventHandler:
             state_ids_before_event=state_ids,
         )
         try:
-            context = await self._check_event_auth(
-                origin,
-                event,
-                context,
-            )
+            await self._check_event_auth(origin, event, context)
         except AuthError as e:
             # This happens only if we couldn't find the auth events. We'll already have
             # logged a warning, so now we just convert to a FederationError.
@@ -1495,11 +1492,8 @@ class FederationEventHandler:
         )
 
     async def _check_event_auth(
-        self,
-        origin: str,
-        event: EventBase,
-        context: EventContext,
-    ) -> EventContext:
+        self, origin: str, event: EventBase, context: EventContext
+    ) -> None:
         """
         Checks whether an event should be rejected (for failing auth checks).
 
@@ -1509,9 +1503,6 @@ class FederationEventHandler:
             context:
                 The event context.
 
-        Returns:
-            The updated context object.
-
         Raises:
             AuthError if we were unable to find copies of the event's auth events.
                (Most other failures just cause us to set `context.rejected`.)
@@ -1526,7 +1517,7 @@ class FederationEventHandler:
             logger.warning("While validating received event %r: %s", event, e)
             # TODO: use a different rejected reason here?
             context.rejected = RejectedReason.AUTH_ERROR
-            return context
+            return
 
         # next, check that we have all of the event's auth events.
         #
@@ -1538,6 +1529,9 @@ class FederationEventHandler:
         )
 
         # ... and check that the event passes auth at those auth events.
+        # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu:
+        #   4. Passes authorization rules based on the event’s auth events,
+        #      otherwise it is rejected.
         try:
             await check_state_independent_auth_rules(self._store, event)
             check_state_dependent_auth_rules(event, claimed_auth_events)
@@ -1546,55 +1540,90 @@ class FederationEventHandler:
                 "While checking auth of %r against auth_events: %s", event, e
             )
             context.rejected = RejectedReason.AUTH_ERROR
-            return context
+            return
 
-        # now check auth against what we think the auth events *should* be.
-        event_types = event_auth.auth_types_for_event(event.room_version, event)
-        prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types(event_types)
-        )
+        # now check the auth rules pass against the room state before the event
+        # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu:
+        #   5. Passes authorization rules based on the state before the event,
+        #      otherwise it is rejected.
+        #
+        # ... however, if we only have partial state for the room, then there is a good
+        # chance that we'll be missing some of the state needed to auth the new event.
+        # So, we state-resolve the auth events that we are given against the state that
+        # we know about, which ensures things like bans are applied. (Note that we'll
+        # already have checked we have all the auth events, in
+        # _load_or_fetch_auth_events_for_event above)
+        if context.partial_state:
+            room_version = await self._store.get_room_version_id(event.room_id)
 
-        auth_events_ids = self._event_auth_handler.compute_auth_events(
-            event, prev_state_ids, for_verification=True
-        )
-        auth_events_x = await self._store.get_events(auth_events_ids)
-        calculated_auth_event_map = {
-            (e.type, e.state_key): e for e in auth_events_x.values()
-        }
+            local_state_id_map = await context.get_prev_state_ids()
+            claimed_auth_events_id_map = {
+                (ev.type, ev.state_key): ev.event_id for ev in claimed_auth_events
+            }
 
-        try:
-            updated_auth_events = await self._update_auth_events_for_auth(
-                event,
-                calculated_auth_event_map=calculated_auth_event_map,
+            state_for_auth_id_map = (
+                await self._state_resolution_handler.resolve_events_with_store(
+                    event.room_id,
+                    room_version,
+                    [local_state_id_map, claimed_auth_events_id_map],
+                    event_map=None,
+                    state_res_store=StateResolutionStore(self._store),
+                )
             )
-        except Exception:
-            # We don't really mind if the above fails, so lets not fail
-            # processing if it does. However, it really shouldn't fail so
-            # let's still log as an exception since we'll still want to fix
-            # any bugs.
-            logger.exception(
-                "Failed to double check auth events for %s with remote. "
-                "Ignoring failure and continuing processing of event.",
-                event.event_id,
-            )
-            updated_auth_events = None
-
-        if updated_auth_events:
-            context = await self._update_context_for_auth_events(
-                event, context, updated_auth_events
-            )
-            auth_events_for_auth = updated_auth_events
         else:
-            auth_events_for_auth = calculated_auth_event_map
+            event_types = event_auth.auth_types_for_event(event.room_version, event)
+            state_for_auth_id_map = await context.get_prev_state_ids(
+                StateFilter.from_types(event_types)
+            )
+
+        calculated_auth_event_ids = self._event_auth_handler.compute_auth_events(
+            event, state_for_auth_id_map, for_verification=True
+        )
+
+        # if those are the same, we're done here.
+        if collections.Counter(event.auth_event_ids()) == collections.Counter(
+            calculated_auth_event_ids
+        ):
+            return
+
+        # otherwise, re-run the auth checks based on what we calculated.
+        calculated_auth_events = await self._store.get_events_as_list(
+            calculated_auth_event_ids
+        )
+
+        # log the differences
+
+        claimed_auth_event_map = {(e.type, e.state_key): e for e in claimed_auth_events}
+        calculated_auth_event_map = {
+            (e.type, e.state_key): e for e in calculated_auth_events
+        }
+        logger.info(
+            "event's auth_events are different to our calculated auth_events. "
+            "Claimed but not calculated: %s. Calculated but not claimed: %s",
+            [
+                ev
+                for k, ev in claimed_auth_event_map.items()
+                if k not in calculated_auth_event_map
+                or calculated_auth_event_map[k].event_id != ev.event_id
+            ],
+            [
+                ev
+                for k, ev in calculated_auth_event_map.items()
+                if k not in claimed_auth_event_map
+                or claimed_auth_event_map[k].event_id != ev.event_id
+            ],
+        )
 
         try:
-            check_state_dependent_auth_rules(event, auth_events_for_auth.values())
+            check_state_dependent_auth_rules(event, calculated_auth_events)
         except AuthError as e:
-            logger.warning("Failed auth resolution for %r because %s", event, e)
+            logger.warning(
+                "While checking auth of %r against room state before the event: %s",
+                event,
+                e,
+            )
             context.rejected = RejectedReason.AUTH_ERROR
 
-        return context
-
     async def _maybe_kick_guest_users(self, event: EventBase) -> None:
         if event.type != EventTypes.GuestAccess:
             return
@@ -1704,93 +1733,6 @@ class FederationEventHandler:
             soft_failed_event_counter.inc()
             event.internal_metadata.soft_failed = True
 
-    async def _update_auth_events_for_auth(
-        self,
-        event: EventBase,
-        calculated_auth_event_map: StateMap[EventBase],
-    ) -> Optional[StateMap[EventBase]]:
-        """Helper for _check_event_auth. See there for docs.
-
-        Checks whether a given event has the expected auth events. If it
-        doesn't then we talk to the remote server to compare state to see if
-        we can come to a consensus (e.g. if one server missed some valid
-        state).
-
-        This attempts to resolve any potential divergence of state between
-        servers, but is not essential and so failures should not block further
-        processing of the event.
-
-        Args:
-            event:
-
-            calculated_auth_event_map:
-                Our calculated auth_events based on the state of the room
-                at the event's position in the DAG.
-
-        Returns:
-            updated auth event map, or None if no changes are needed.
-
-        """
-        assert not event.internal_metadata.outlier
-
-        # check for events which are in the event's claimed auth_events, but not
-        # in our calculated event map.
-        event_auth_events = set(event.auth_event_ids())
-        different_auth = event_auth_events.difference(
-            e.event_id for e in calculated_auth_event_map.values()
-        )
-
-        if not different_auth:
-            return None
-
-        logger.info(
-            "auth_events refers to events which are not in our calculated auth "
-            "chain: %s",
-            different_auth,
-        )
-
-        # XXX: currently this checks for redactions but I'm not convinced that is
-        # necessary?
-        different_events = await self._store.get_events_as_list(different_auth)
-
-        # double-check they're all in the same room - we should already have checked
-        # this but it doesn't hurt to check again.
-        for d in different_events:
-            assert (
-                d.room_id == event.room_id
-            ), f"Event {event.event_id} refers to auth_event {d.event_id} which is in a different room"
-
-        # now we state-resolve between our own idea of the auth events, and the remote's
-        # idea of them.
-
-        local_state = calculated_auth_event_map.values()
-        remote_auth_events = dict(calculated_auth_event_map)
-        remote_auth_events.update({(d.type, d.state_key): d for d in different_events})
-        remote_state = remote_auth_events.values()
-
-        room_version = await self._store.get_room_version_id(event.room_id)
-        new_state = await self._state_handler.resolve_events(
-            room_version, (local_state, remote_state), event
-        )
-        different_state = {
-            (d.type, d.state_key): d
-            for d in new_state.values()
-            if calculated_auth_event_map.get((d.type, d.state_key)) != d
-        }
-        if not different_state:
-            logger.info("State res returned no new state")
-            return None
-
-        logger.info(
-            "After state res: updating auth_events with new state %s",
-            different_state.values(),
-        )
-
-        # take a copy of calculated_auth_event_map before we modify it.
-        auth_events = dict(calculated_auth_event_map)
-        auth_events.update(different_state)
-        return auth_events
-
     async def _load_or_fetch_auth_events_for_event(
         self, destination: str, event: EventBase
     ) -> Collection[EventBase]:
@@ -1888,61 +1830,6 @@ class FederationEventHandler:
 
         await self._auth_and_persist_outliers(room_id, remote_auth_events)
 
-    async def _update_context_for_auth_events(
-        self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase]
-    ) -> EventContext:
-        """Update the state_ids in an event context after auth event resolution,
-        storing the changes as a new state group.
-
-        Args:
-            event: The event we're handling the context for
-
-            context: initial event context
-
-            auth_events: Events to update in the event context.
-
-        Returns:
-            new event context
-        """
-        # exclude the state key of the new event from the current_state in the context.
-        if event.is_state():
-            event_key: Optional[Tuple[str, str]] = (event.type, event.state_key)
-        else:
-            event_key = None
-        state_updates = {
-            k: a.event_id for k, a in auth_events.items() if k != event_key
-        }
-
-        current_state_ids = await context.get_current_state_ids()
-        current_state_ids = dict(current_state_ids)  # type: ignore
-
-        current_state_ids.update(state_updates)
-
-        prev_state_ids = await context.get_prev_state_ids()
-        prev_state_ids = dict(prev_state_ids)
-
-        prev_state_ids.update({k: a.event_id for k, a in auth_events.items()})
-
-        # create a new state group as a delta from the existing one.
-        prev_group = context.state_group
-        state_group = await self._state_storage_controller.store_state_group(
-            event.event_id,
-            event.room_id,
-            prev_group=prev_group,
-            delta_ids=state_updates,
-            current_state_ids=current_state_ids,
-        )
-
-        return EventContext.with_state(
-            storage=self._storage_controllers,
-            state_group=state_group,
-            state_group_before_event=context.state_group_before_event,
-            state_delta_due_to_event=state_updates,
-            prev_group=prev_group,
-            delta_ids=state_updates,
-            partial_state=context.partial_state,
-        )
-
     async def _run_push_actions_and_persist_event(
         self, event: EventBase, context: EventContext, backfilled: bool = False
     ) -> None:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 9f0a36652c..dcd272034d 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -24,7 +24,6 @@ from typing import (
     DefaultDict,
     Dict,
     FrozenSet,
-    Iterable,
     List,
     Mapping,
     Optional,
@@ -422,31 +421,6 @@ class StateHandler:
         )
         return result
 
-    async def resolve_events(
-        self,
-        room_version: str,
-        state_sets: Collection[Iterable[EventBase]],
-        event: EventBase,
-    ) -> StateMap[EventBase]:
-        logger.info(
-            "Resolving state for %s with %d groups", event.room_id, len(state_sets)
-        )
-        state_set_ids = [
-            {(ev.type, ev.state_key): ev.event_id for ev in st} for st in state_sets
-        ]
-
-        state_map = {ev.event_id: ev for st in state_sets for ev in st}
-
-        new_state = await self._state_resolution_handler.resolve_events_with_store(
-            event.room_id,
-            room_version,
-            state_set_ids,
-            event_map=state_map,
-            state_res_store=StateResolutionStore(self.store),
-        )
-
-        return {key: state_map[ev_id] for key, ev_id in new_state.items()}
-
     async def update_current_state(self, room_id: str) -> None:
         """Recalculates the current state for a room, and persists it.
 
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 712933f9ca..8a0bb91f40 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import List, cast
+from typing import cast
 from unittest import TestCase
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -50,8 +50,6 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
         hs = self.setup_test_homeserver(federation_http_client=None)
         self.handler = hs.get_federation_handler()
         self.store = hs.get_datastores().main
-        self.state_storage_controller = hs.get_storage_controllers().state
-        self._event_auth_handler = hs.get_event_auth_handler()
         return hs
 
     def test_exchange_revoked_invite(self) -> None:
@@ -314,142 +312,6 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
             )
         self.get_success(d)
 
-    def test_backfill_floating_outlier_membership_auth(self) -> None:
-        """
-        As the local homeserver, check that we can properly process a federated
-        event from the OTHER_SERVER with auth_events that include a floating
-        membership event from the OTHER_SERVER.
-
-        Regression test, see #10439.
-        """
-        OTHER_SERVER = "otherserver"
-        OTHER_USER = "@otheruser:" + OTHER_SERVER
-
-        # create the room
-        user_id = self.register_user("kermit", "test")
-        tok = self.login("kermit", "test")
-        room_id = self.helper.create_room_as(
-            room_creator=user_id,
-            is_public=True,
-            tok=tok,
-            extra_content={
-                "preset": "public_chat",
-            },
-        )
-        room_version = self.get_success(self.store.get_room_version(room_id))
-
-        prev_event_ids = self.get_success(self.store.get_prev_events_for_room(room_id))
-        (
-            most_recent_prev_event_id,
-            most_recent_prev_event_depth,
-        ) = self.get_success(self.store.get_max_depth_of(prev_event_ids))
-        # mapping from (type, state_key) -> state_event_id
-        assert most_recent_prev_event_id is not None
-        prev_state_map = self.get_success(
-            self.state_storage_controller.get_state_ids_for_event(
-                most_recent_prev_event_id
-            )
-        )
-        # List of state event ID's
-        prev_state_ids = list(prev_state_map.values())
-        auth_event_ids = prev_state_ids
-        auth_events = list(
-            self.get_success(self.store.get_events(auth_event_ids)).values()
-        )
-
-        # build a floating outlier member state event
-        fake_prev_event_id = "$" + random_string(43)
-        member_event_dict = {
-            "type": EventTypes.Member,
-            "content": {
-                "membership": "join",
-            },
-            "state_key": OTHER_USER,
-            "room_id": room_id,
-            "sender": OTHER_USER,
-            "depth": most_recent_prev_event_depth,
-            "prev_events": [fake_prev_event_id],
-            "origin_server_ts": self.clock.time_msec(),
-            "signatures": {OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}},
-        }
-        builder = self.hs.get_event_builder_factory().for_room_version(
-            room_version, member_event_dict
-        )
-        member_event = self.get_success(
-            builder.build(
-                prev_event_ids=member_event_dict["prev_events"],
-                auth_event_ids=self._event_auth_handler.compute_auth_events(
-                    builder,
-                    prev_state_map,
-                    for_verification=False,
-                ),
-                depth=member_event_dict["depth"],
-            )
-        )
-        # Override the signature added from "test" homeserver that we created the event with
-        member_event.signatures = member_event_dict["signatures"]
-
-        # Add the new member_event to the StateMap
-        updated_state_map = dict(prev_state_map)
-        updated_state_map[
-            (member_event.type, member_event.state_key)
-        ] = member_event.event_id
-        auth_events.append(member_event)
-
-        # build and send an event authed based on the member event
-        message_event_dict = {
-            "type": EventTypes.Message,
-            "content": {},
-            "room_id": room_id,
-            "sender": OTHER_USER,
-            "depth": most_recent_prev_event_depth,
-            "prev_events": prev_event_ids.copy(),
-            "origin_server_ts": self.clock.time_msec(),
-            "signatures": {OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}},
-        }
-        builder = self.hs.get_event_builder_factory().for_room_version(
-            room_version, message_event_dict
-        )
-        message_event = self.get_success(
-            builder.build(
-                prev_event_ids=message_event_dict["prev_events"],
-                auth_event_ids=self._event_auth_handler.compute_auth_events(
-                    builder,
-                    updated_state_map,
-                    for_verification=False,
-                ),
-                depth=message_event_dict["depth"],
-            )
-        )
-        # Override the signature added from "test" homeserver that we created the event with
-        message_event.signatures = message_event_dict["signatures"]
-
-        # Stub the /event_auth response from the OTHER_SERVER
-        async def get_event_auth(
-            destination: str, room_id: str, event_id: str
-        ) -> List[EventBase]:
-            return [
-                event_from_pdu_json(ae.get_pdu_json(), room_version=room_version)
-                for ae in auth_events
-            ]
-
-        self.handler.federation_client.get_event_auth = get_event_auth  # type: ignore[assignment]
-
-        with LoggingContext("receive_pdu"):
-            # Fake the OTHER_SERVER federating the message event over to our local homeserver
-            d = run_in_background(
-                self.hs.get_federation_event_handler().on_receive_pdu,
-                OTHER_SERVER,
-                message_event,
-            )
-        self.get_success(d)
-
-        # Now try and get the events on our local homeserver
-        stored_event = self.get_success(
-            self.store.get_event(message_event.event_id, allow_none=True)
-        )
-        self.assertTrue(stored_event is not None)
-
     @unittest.override_config(
         {"rc_invites": {"per_user": {"per_second": 0.5, "burst_count": 3}}}
     )
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index 5eb0f243f7..9a48e9286f 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -21,7 +21,6 @@ from synapse.api.constants import EventTypes, LoginType, Membership
 from synapse.api.errors import SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
 from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.rest import admin
 from synapse.rest.client import account, login, profile, room
@@ -113,14 +112,8 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
         # Have this homeserver skip event auth checks. This is necessary due to
         # event auth checks ensuring that events were signed by the sender's homeserver.
-        async def _check_event_auth(
-            origin: str,
-            event: EventBase,
-            context: EventContext,
-            *args: Any,
-            **kwargs: Any,
-        ) -> EventContext:
-            return context
+        async def _check_event_auth(origin: Any, event: Any, context: Any) -> None:
+            pass
 
         hs.get_federation_event_handler()._check_event_auth = _check_event_auth  # type: ignore[assignment]
 
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 0cbef70bfa..779fad1f63 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -81,12 +81,8 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         self.handler = self.homeserver.get_federation_handler()
         federation_event_handler = self.homeserver.get_federation_event_handler()
 
-        async def _check_event_auth(
-            origin,
-            event,
-            context,
-        ):
-            return context
+        async def _check_event_auth(origin, event, context):
+            pass
 
         federation_event_handler._check_event_auth = _check_event_auth
         self.client = self.homeserver.get_federation_client()

From 21eeacc99551febcddcef21db96a2bd82166fc7e Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 15 Jul 2022 10:36:56 +0200
Subject: [PATCH 126/178] Federation Sender & Appservice Pusher Stream
 Optimisations (#13251)

* Replace `get_new_events_for_appservice` with `get_all_new_events_stream`

The functions were near identical and this brings the AS worker closer
to the way federation senders work which can allow for multiple workers
to handle AS traffic.

* Pull received TS alongside events when processing the stream

This avoids an extra query -per event- when both federation sender
and appservice pusher process events.
---
 changelog.d/13251.misc                        |  1 +
 synapse/federation/sender/__init__.py         | 10 ++-
 synapse/handlers/appservice.py                | 11 ++--
 synapse/storage/databases/main/appservice.py  | 62 ++++++-------------
 .../storage/databases/main/events_worker.py   | 19 ------
 synapse/storage/databases/main/stream.py      | 32 ++++++----
 tests/handlers/test_appservice.py             | 16 ++---
 7 files changed, 62 insertions(+), 89 deletions(-)
 create mode 100644 changelog.d/13251.misc

diff --git a/changelog.d/13251.misc b/changelog.d/13251.misc
new file mode 100644
index 0000000000..526369e403
--- /dev/null
+++ b/changelog.d/13251.misc
@@ -0,0 +1 @@
+Optimise federation sender and appservice pusher event stream processing queries. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 99a794c042..94a65ac65f 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -351,7 +351,11 @@ class FederationSender(AbstractFederationSender):
             self._is_processing = True
             while True:
                 last_token = await self.store.get_federation_out_pos("events")
-                next_token, events = await self.store.get_all_new_events_stream(
+                (
+                    next_token,
+                    events,
+                    event_to_received_ts,
+                ) = await self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
                 )
 
@@ -476,7 +480,7 @@ class FederationSender(AbstractFederationSender):
                         await self._send_pdu(event, sharded_destinations)
 
                         now = self.clock.time_msec()
-                        ts = await self.store.get_received_ts(event.event_id)
+                        ts = event_to_received_ts[event.event_id]
                         assert ts is not None
                         synapse.metrics.event_processing_lag_by_event.labels(
                             "federation_sender"
@@ -509,7 +513,7 @@ class FederationSender(AbstractFederationSender):
 
                 if events:
                     now = self.clock.time_msec()
-                    ts = await self.store.get_received_ts(events[-1].event_id)
+                    ts = event_to_received_ts[events[-1].event_id]
                     assert ts is not None
 
                     synapse.metrics.event_processing_lag.labels(
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 814553e098..203b62e015 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -104,14 +104,15 @@ class ApplicationServicesHandler:
         with Measure(self.clock, "notify_interested_services"):
             self.is_processing = True
             try:
-                limit = 100
                 upper_bound = -1
                 while upper_bound < self.current_max:
+                    last_token = await self.store.get_appservice_last_pos()
                     (
                         upper_bound,
                         events,
-                    ) = await self.store.get_new_events_for_appservice(
-                        self.current_max, limit
+                        event_to_received_ts,
+                    ) = await self.store.get_all_new_events_stream(
+                        last_token, self.current_max, limit=100, get_prev_content=True
                     )
 
                     events_by_room: Dict[str, List[EventBase]] = {}
@@ -150,7 +151,7 @@ class ApplicationServicesHandler:
                             )
 
                         now = self.clock.time_msec()
-                        ts = await self.store.get_received_ts(event.event_id)
+                        ts = event_to_received_ts[event.event_id]
                         assert ts is not None
 
                         synapse.metrics.event_processing_lag_by_event.labels(
@@ -187,7 +188,7 @@ class ApplicationServicesHandler:
 
                     if events:
                         now = self.clock.time_msec()
-                        ts = await self.store.get_received_ts(events[-1].event_id)
+                        ts = event_to_received_ts[events[-1].event_id]
                         assert ts is not None
 
                         synapse.metrics.event_processing_lag.labels(
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index e284454b66..64b70a7b28 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -371,52 +371,30 @@ class ApplicationServiceTransactionWorkerStore(
             device_list_summary=DeviceListUpdates(),
         )
 
+    async def get_appservice_last_pos(self) -> int:
+        """
+        Get the last stream ordering position for the appservice process.
+        """
+
+        return await self.db_pool.simple_select_one_onecol(
+            table="appservice_stream_position",
+            retcol="stream_ordering",
+            keyvalues={},
+            desc="get_appservice_last_pos",
+        )
+
     async def set_appservice_last_pos(self, pos: int) -> None:
-        def set_appservice_last_pos_txn(txn: LoggingTransaction) -> None:
-            txn.execute(
-                "UPDATE appservice_stream_position SET stream_ordering = ?", (pos,)
-            )
+        """
+        Set the last stream ordering position for the appservice process.
+        """
 
-        await self.db_pool.runInteraction(
-            "set_appservice_last_pos", set_appservice_last_pos_txn
+        await self.db_pool.simple_update_one(
+            table="appservice_stream_position",
+            keyvalues={},
+            updatevalues={"stream_ordering": pos},
+            desc="set_appservice_last_pos",
         )
 
-    async def get_new_events_for_appservice(
-        self, current_id: int, limit: int
-    ) -> Tuple[int, List[EventBase]]:
-        """Get all new events for an appservice"""
-
-        def get_new_events_for_appservice_txn(
-            txn: LoggingTransaction,
-        ) -> Tuple[int, List[str]]:
-            sql = (
-                "SELECT e.stream_ordering, e.event_id"
-                " FROM events AS e"
-                " WHERE"
-                " (SELECT stream_ordering FROM appservice_stream_position)"
-                "     < e.stream_ordering"
-                " AND e.stream_ordering <= ?"
-                " ORDER BY e.stream_ordering ASC"
-                " LIMIT ?"
-            )
-
-            txn.execute(sql, (current_id, limit))
-            rows = txn.fetchall()
-
-            upper_bound = current_id
-            if len(rows) == limit:
-                upper_bound = rows[-1][0]
-
-            return upper_bound, [row[1] for row in rows]
-
-        upper_bound, event_ids = await self.db_pool.runInteraction(
-            "get_new_events_for_appservice", get_new_events_for_appservice_txn
-        )
-
-        events = await self.get_events_as_list(event_ids, get_prev_content=True)
-
-        return upper_bound, events
-
     async def get_type_stream_id_for_appservice(
         self, service: ApplicationService, type: str
     ) -> int:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index b99b107784..621f92e238 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -292,25 +292,6 @@ class EventsWorkerStore(SQLBaseStore):
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
-    async def get_received_ts(self, event_id: str) -> Optional[int]:
-        """Get received_ts (when it was persisted) for the event.
-
-        Raises an exception for unknown events.
-
-        Args:
-            event_id: The event ID to query.
-
-        Returns:
-            Timestamp in milliseconds, or None for events that were persisted
-            before received_ts was implemented.
-        """
-        return await self.db_pool.simple_select_one_onecol(
-            table="events",
-            keyvalues={"event_id": event_id},
-            retcol="received_ts",
-            desc="get_received_ts",
-        )
-
     async def have_censored_event(self, event_id: str) -> bool:
         """Check if an event has been censored, i.e. if the content of the event has been erased
         from the database due to a redaction.
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 3a1df7776c..2590b52f73 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1022,8 +1022,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         }
 
     async def get_all_new_events_stream(
-        self, from_id: int, current_id: int, limit: int
-    ) -> Tuple[int, List[EventBase]]:
+        self, from_id: int, current_id: int, limit: int, get_prev_content: bool = False
+    ) -> Tuple[int, List[EventBase], Dict[str, Optional[int]]]:
         """Get all new events
 
         Returns all events with from_id < stream_ordering <= current_id.
@@ -1032,19 +1032,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             from_id:  the stream_ordering of the last event we processed
             current_id:  the stream_ordering of the most recently processed event
             limit: the maximum number of events to return
+            get_prev_content: whether to fetch previous event content
 
         Returns:
-            A tuple of (next_id, events), where `next_id` is the next value to
-            pass as `from_id` (it will either be the stream_ordering of the
-            last returned event, or, if fewer than `limit` events were found,
-            the `current_id`).
+            A tuple of (next_id, events, event_to_received_ts), where `next_id`
+            is the next value to pass as `from_id` (it will either be the
+            stream_ordering of the last returned event, or, if fewer than `limit`
+            events were found, the `current_id`). The `event_to_received_ts` is
+            a dictionary mapping event ID to the event `received_ts`.
         """
 
         def get_all_new_events_stream_txn(
             txn: LoggingTransaction,
-        ) -> Tuple[int, List[str]]:
+        ) -> Tuple[int, Dict[str, Optional[int]]]:
             sql = (
-                "SELECT e.stream_ordering, e.event_id"
+                "SELECT e.stream_ordering, e.event_id, e.received_ts"
                 " FROM events AS e"
                 " WHERE"
                 " ? < e.stream_ordering AND e.stream_ordering <= ?"
@@ -1059,15 +1061,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             if len(rows) == limit:
                 upper_bound = rows[-1][0]
 
-            return upper_bound, [row[1] for row in rows]
+            event_to_received_ts: Dict[str, Optional[int]] = {
+                row[1]: row[2] for row in rows
+            }
+            return upper_bound, event_to_received_ts
 
-        upper_bound, event_ids = await self.db_pool.runInteraction(
+        upper_bound, event_to_received_ts = await self.db_pool.runInteraction(
             "get_all_new_events_stream", get_all_new_events_stream_txn
         )
 
-        events = await self.get_events_as_list(event_ids)
+        events = await self.get_events_as_list(
+            event_to_received_ts.keys(),
+            get_prev_content=get_prev_content,
+        )
 
-        return upper_bound, events
+        return upper_bound, events, event_to_received_ts
 
     async def get_federation_out_pos(self, typ: str) -> int:
         if self._need_to_reset_federation_stream_positions:
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index d96d5aa138..b17af2725b 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -50,7 +50,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         self.mock_scheduler = Mock()
         hs = Mock()
         hs.get_datastores.return_value = Mock(main=self.mock_store)
-        self.mock_store.get_received_ts.return_value = make_awaitable(0)
+        self.mock_store.get_appservice_last_pos.return_value = make_awaitable(None)
         self.mock_store.set_appservice_last_pos.return_value = make_awaitable(None)
         self.mock_store.set_appservice_stream_type_pos.return_value = make_awaitable(
             None
@@ -76,9 +76,9 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         event = Mock(
             sender="@someone:anywhere", type="m.room.message", room_id="!foo:bar"
         )
-        self.mock_store.get_new_events_for_appservice.side_effect = [
-            make_awaitable((0, [])),
-            make_awaitable((1, [event])),
+        self.mock_store.get_all_new_events_stream.side_effect = [
+            make_awaitable((0, [], {})),
+            make_awaitable((1, [event], {event.event_id: 0})),
         ]
         self.handler.notify_interested_services(RoomStreamToken(None, 1))
 
@@ -95,8 +95,8 @@ class AppServiceHandlerTestCase(unittest.TestCase):
 
         event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar")
         self.mock_as_api.query_user.return_value = make_awaitable(True)
-        self.mock_store.get_new_events_for_appservice.side_effect = [
-            make_awaitable((0, [event])),
+        self.mock_store.get_all_new_events_stream.side_effect = [
+            make_awaitable((0, [event], {event.event_id: 0})),
         ]
 
         self.handler.notify_interested_services(RoomStreamToken(None, 0))
@@ -112,8 +112,8 @@ class AppServiceHandlerTestCase(unittest.TestCase):
 
         event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar")
         self.mock_as_api.query_user.return_value = make_awaitable(True)
-        self.mock_store.get_new_events_for_appservice.side_effect = [
-            make_awaitable((0, [event])),
+        self.mock_store.get_all_new_events_stream.side_effect = [
+            make_awaitable((0, [event], {event.event_id: 0})),
         ]
 
         self.handler.notify_interested_services(RoomStreamToken(None, 0))

From cc21a431f3bdb353427c3242e49b1941a51175b3 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 15 Jul 2022 11:30:46 +0200
Subject: [PATCH 127/178] Async get event cache prep (#13242)

Some experimental prep work to enable external event caching based on #9379 & #12955. Doesn't actually move the cache at all, just lays the groundwork for async implemented caches.

Signed off by Nick @ Beeper (@Fizzadar)
---
 changelog.d/13242.misc                        |  1 +
 synapse/storage/database.py                   | 10 +++--
 synapse/storage/databases/main/cache.py       |  7 +++-
 synapse/storage/databases/main/events.py      |  4 +-
 .../storage/databases/main/events_worker.py   | 34 ++++++++++++-----
 .../storage/databases/main/purge_events.py    |  2 +-
 synapse/storage/databases/main/roommember.py  |  4 +-
 synapse/util/caches/lrucache.py               | 38 +++++++++++++++++++
 tests/handlers/test_sync.py                   |  2 +-
 .../databases/main/test_events_worker.py      |  8 ++--
 tests/storage/test_purge.py                   |  2 +-
 11 files changed, 86 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/13242.misc

diff --git a/changelog.d/13242.misc b/changelog.d/13242.misc
new file mode 100644
index 0000000000..7f8ec0815f
--- /dev/null
+++ b/changelog.d/13242.misc
@@ -0,0 +1 @@
+Use an asynchronous cache wrapper for the get event cache. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e21ab08515..6a6d0dcd73 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -57,7 +57,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.types import Connection, Cursor
-from synapse.util.async_helpers import delay_cancellation
+from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -818,12 +818,14 @@ class DatabasePool:
                     )
 
                 for after_callback, after_args, after_kwargs in after_callbacks:
-                    after_callback(*after_args, **after_kwargs)
+                    await maybe_awaitable(after_callback(*after_args, **after_kwargs))
 
                 return cast(R, result)
             except Exception:
-                for after_callback, after_args, after_kwargs in exception_callbacks:
-                    after_callback(*after_args, **after_kwargs)
+                for exception_callback, after_args, after_kwargs in exception_callbacks:
+                    await maybe_awaitable(
+                        exception_callback(*after_args, **after_kwargs)
+                    )
                 raise
 
         # To handle cancellation, we ensure that `after_callback`s and
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 1653a6a9b6..2367ddeea3 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -193,7 +193,10 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         relates_to: Optional[str],
         backfilled: bool,
     ) -> None:
-        self._invalidate_get_event_cache(event_id)
+        # This invalidates any local in-memory cached event objects, the original
+        # process triggering the invalidation is responsible for clearing any external
+        # cached objects.
+        self._invalidate_local_get_event_cache(event_id)
         self.have_seen_event.invalidate((room_id, event_id))
 
         self.get_latest_event_ids_in_room.invalidate((room_id,))
@@ -208,7 +211,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._events_stream_cache.entity_has_changed(room_id, stream_ordering)
 
         if redacts:
-            self._invalidate_get_event_cache(redacts)
+            self._invalidate_local_get_event_cache(redacts)
             # Caches which might leak edits must be invalidated for the event being
             # redacted.
             self.get_relations_for_event.invalidate((redacts,))
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index eb4efbb93c..fa2266ba20 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1669,9 +1669,9 @@ class PersistEventsStore:
             if not row["rejects"] and not row["redacts"]:
                 to_prefill.append(EventCacheEntry(event=event, redacted_event=None))
 
-        def prefill() -> None:
+        async def prefill() -> None:
             for cache_entry in to_prefill:
-                self.store._get_event_cache.set(
+                await self.store._get_event_cache.set(
                     (cache_entry.event.event_id,), cache_entry
                 )
 
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 621f92e238..f3935bfead 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -79,7 +79,7 @@ from synapse.types import JsonDict, get_domain_from_id
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred, delay_cancellation
 from synapse.util.caches.descriptors import cached, cachedList
-from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.lrucache import AsyncLruCache
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
@@ -238,7 +238,9 @@ class EventsWorkerStore(SQLBaseStore):
                 5 * 60 * 1000,
             )
 
-        self._get_event_cache: LruCache[Tuple[str], EventCacheEntry] = LruCache(
+        self._get_event_cache: AsyncLruCache[
+            Tuple[str], EventCacheEntry
+        ] = AsyncLruCache(
             cache_name="*getEvent*",
             max_size=hs.config.caches.event_cache_size,
         )
@@ -598,7 +600,7 @@ class EventsWorkerStore(SQLBaseStore):
         Returns:
             map from event id to result
         """
-        event_entry_map = self._get_events_from_cache(
+        event_entry_map = await self._get_events_from_cache(
             event_ids,
         )
 
@@ -710,12 +712,22 @@ class EventsWorkerStore(SQLBaseStore):
 
         return event_entry_map
 
-    def _invalidate_get_event_cache(self, event_id: str) -> None:
-        self._get_event_cache.invalidate((event_id,))
+    async def _invalidate_get_event_cache(self, event_id: str) -> None:
+        # First we invalidate the asynchronous cache instance. This may include
+        # out-of-process caches such as Redis/memcache. Once complete we can
+        # invalidate any in memory cache. The ordering is important here to
+        # ensure we don't pull in any remote invalid value after we invalidate
+        # the in-memory cache.
+        await self._get_event_cache.invalidate((event_id,))
         self._event_ref.pop(event_id, None)
         self._current_event_fetches.pop(event_id, None)
 
-    def _get_events_from_cache(
+    def _invalidate_local_get_event_cache(self, event_id: str) -> None:
+        self._get_event_cache.invalidate_local((event_id,))
+        self._event_ref.pop(event_id, None)
+        self._current_event_fetches.pop(event_id, None)
+
+    async def _get_events_from_cache(
         self, events: Iterable[str], update_metrics: bool = True
     ) -> Dict[str, EventCacheEntry]:
         """Fetch events from the caches.
@@ -730,7 +742,7 @@ class EventsWorkerStore(SQLBaseStore):
 
         for event_id in events:
             # First check if it's in the event cache
-            ret = self._get_event_cache.get(
+            ret = await self._get_event_cache.get(
                 (event_id,), None, update_metrics=update_metrics
             )
             if ret:
@@ -752,7 +764,7 @@ class EventsWorkerStore(SQLBaseStore):
 
                 # We add the entry back into the cache as we want to keep
                 # recently queried events in the cache.
-                self._get_event_cache.set((event_id,), cache_entry)
+                await self._get_event_cache.set((event_id,), cache_entry)
 
         return event_map
 
@@ -1129,7 +1141,7 @@ class EventsWorkerStore(SQLBaseStore):
                 event=original_ev, redacted_event=redacted_event
             )
 
-            self._get_event_cache.set((event_id,), cache_entry)
+            await self._get_event_cache.set((event_id,), cache_entry)
             result_map[event_id] = cache_entry
 
             if not redacted_event:
@@ -1363,7 +1375,9 @@ class EventsWorkerStore(SQLBaseStore):
         # if the event cache contains the event, obviously we've seen it.
 
         cache_results = {
-            (rid, eid) for (rid, eid) in keys if self._get_event_cache.contains((eid,))
+            (rid, eid)
+            for (rid, eid) in keys
+            if await self._get_event_cache.contains((eid,))
         }
         results = dict.fromkeys(cache_results, True)
         remaining = [k for k in keys if k not in cache_results]
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 87b0d09039..549ce07c16 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -302,7 +302,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
                 self._invalidate_cache_and_stream(
                     txn, self.have_seen_event, (room_id, event_id)
                 )
-                self._invalidate_get_event_cache(event_id)
+                txn.call_after(self._invalidate_get_event_cache, event_id)
 
         logger.info("[purge] done")
 
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 71a65d565a..105a518677 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -843,7 +843,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # We don't update the event cache hit ratio as it completely throws off
         # the hit ratio counts. After all, we don't populate the cache if we
         # miss it here
-        event_map = self._get_events_from_cache(member_event_ids, update_metrics=False)
+        event_map = await self._get_events_from_cache(
+            member_event_ids, update_metrics=False
+        )
 
         missing_member_event_ids = []
         for event_id in member_event_ids:
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 8ed5325c5d..31f41fec82 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -730,3 +730,41 @@ class LruCache(Generic[KT, VT]):
         # This happens e.g. in the sync code where we have an expiring cache of
         # lru caches.
         self.clear()
+
+
+class AsyncLruCache(Generic[KT, VT]):
+    """
+    An asynchronous wrapper around a subset of the LruCache API.
+
+    On its own this doesn't change the behaviour but allows subclasses that
+    utilize external cache systems that require await behaviour to be created.
+    """
+
+    def __init__(self, *args, **kwargs):  # type: ignore
+        self._lru_cache: LruCache[KT, VT] = LruCache(*args, **kwargs)
+
+    async def get(
+        self, key: KT, default: Optional[T] = None, update_metrics: bool = True
+    ) -> Optional[VT]:
+        return self._lru_cache.get(key, update_metrics=update_metrics)
+
+    async def set(self, key: KT, value: VT) -> None:
+        self._lru_cache.set(key, value)
+
+    async def invalidate(self, key: KT) -> None:
+        # This method should invalidate any external cache and then invalidate the LruCache.
+        return self._lru_cache.invalidate(key)
+
+    def invalidate_local(self, key: KT) -> None:
+        """Remove an entry from the local cache
+
+        This variant of `invalidate` is useful if we know that the external
+        cache has already been invalidated.
+        """
+        return self._lru_cache.invalidate(key)
+
+    async def contains(self, key: KT) -> bool:
+        return self._lru_cache.contains(key)
+
+    async def clear(self) -> None:
+        self._lru_cache.clear()
diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py
index ecc7cc6461..e3f38fbcc5 100644
--- a/tests/handlers/test_sync.py
+++ b/tests/handlers/test_sync.py
@@ -159,7 +159,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
 
         # Blow away caches (supported room versions can only change due to a restart).
         self.store.get_rooms_for_user_with_stream_ordering.invalidate_all()
-        self.store._get_event_cache.clear()
+        self.get_success(self.store._get_event_cache.clear())
         self.store._event_ref.clear()
 
         # The rooms should be excluded from the sync response.
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 38963ce4a7..46d829b062 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -143,7 +143,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         self.event_id = res["event_id"]
 
         # Reset the event cache so the tests start with it empty
-        self.store._get_event_cache.clear()
+        self.get_success(self.store._get_event_cache.clear())
 
     def test_simple(self):
         """Test that we cache events that we pull from the DB."""
@@ -160,7 +160,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         """
 
         # Reset the event cache
-        self.store._get_event_cache.clear()
+        self.get_success(self.store._get_event_cache.clear())
 
         with LoggingContext("test") as ctx:
             # We keep hold of the event event though we never use it.
@@ -170,7 +170,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
             self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1)
 
         # Reset the event cache
-        self.store._get_event_cache.clear()
+        self.get_success(self.store._get_event_cache.clear())
 
         with LoggingContext("test") as ctx:
             self.get_success(self.store.get_event(self.event_id))
@@ -345,7 +345,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
         self.event_id = res["event_id"]
 
         # Reset the event cache so the tests start with it empty
-        self.store._get_event_cache.clear()
+        self.get_success(self.store._get_event_cache.clear())
 
     @contextmanager
     def blocking_get_event_calls(
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 8dfaa0559b..9c1182ed16 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -115,6 +115,6 @@ class PurgeTests(HomeserverTestCase):
         )
 
         # The events aren't found.
-        self.store._invalidate_get_event_cache(create_event.event_id)
+        self.store._invalidate_local_get_event_cache(create_event.event_id)
         self.get_failure(self.store.get_event(create_event.event_id), NotFoundError)
         self.get_failure(self.store.get_event(first["event_id"]), NotFoundError)

From 512486bbeb606ae989fe26d7b39074096a70b32c Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 15 Jul 2022 11:13:40 +0100
Subject: [PATCH 128/178] Docker: copy postgres from base image (#13279)

When building the docker images for complement testing, copy a preinstalled
complement over from a base image, rather than apt installing it. This avoids
network traffic and is much faster.
---
 changelog.d/13279.misc                        |  1 +
 docker/complement/Dockerfile                  | 76 +++++++++++--------
 .../complement/conf/postgres.supervisord.conf |  2 +-
 3 files changed, 48 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/13279.misc

diff --git a/changelog.d/13279.misc b/changelog.d/13279.misc
new file mode 100644
index 0000000000..a083d2af2a
--- /dev/null
+++ b/changelog.d/13279.misc
@@ -0,0 +1 @@
+Reduce the rebuild time for the complement-synapse docker image.
diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile
index 8bec0f6116..c5e7984a28 100644
--- a/docker/complement/Dockerfile
+++ b/docker/complement/Dockerfile
@@ -4,42 +4,58 @@
 #
 # Instructions for building this image from those it depends on is detailed in this guide:
 # https://github.com/matrix-org/synapse/blob/develop/docker/README-testing.md#testing-with-postgresql-and-single-or-multi-process-synapse
+
 ARG SYNAPSE_VERSION=latest
+
+# first of all, we create a base image with a postgres server and database,
+# which we can copy into the target image. For repeated rebuilds, this is
+# much faster than apt installing postgres each time.
+#
+# This trick only works because (a) the Synapse image happens to have all the
+# shared libraries that postgres wants, (b) we use a postgres image based on
+# the same debian version as Synapse's docker image (so the versions of the
+# shared libraries match).
+
+FROM postgres:13-bullseye AS postgres_base
+    # initialise the database cluster in /var/lib/postgresql
+    RUN gosu postgres initdb --locale=C --encoding=UTF-8 --auth-host password
+
+    # Configure a password and create a database for Synapse
+    RUN echo "ALTER USER postgres PASSWORD 'somesecret'" | gosu postgres postgres --single
+    RUN echo "CREATE DATABASE synapse" | gosu postgres postgres --single
+
+# now build the final image, based on the Synapse image.
+
 FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION
+    # copy the postgres installation over from the image we built above
+    RUN adduser --system --uid 999 postgres --home /var/lib/postgresql
+    COPY --from=postgres_base /var/lib/postgresql /var/lib/postgresql
+    COPY --from=postgres_base /usr/lib/postgresql /usr/lib/postgresql
+    COPY --from=postgres_base /usr/share/postgresql /usr/share/postgresql
+    RUN mkdir /var/run/postgresql && chown postgres /var/run/postgresql
+    ENV PATH="${PATH}:/usr/lib/postgresql/13/bin"
+    ENV PGDATA=/var/lib/postgresql/data
 
-# Install postgresql
-RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -yqq postgresql-13
+    # Extend the shared homeserver config to disable rate-limiting,
+    # set Complement's static shared secret, enable registration, amongst other
+    # tweaks to get Synapse ready for testing.
+    # To do this, we copy the old template out of the way and then include it
+    # with Jinja2.
+    RUN mv /conf/shared.yaml.j2 /conf/shared-orig.yaml.j2
+    COPY conf/workers-shared-extra.yaml.j2 /conf/shared.yaml.j2
 
-# Configure a user and create a database for Synapse
-RUN pg_ctlcluster 13 main start &&  su postgres -c "echo \
- \"ALTER USER postgres PASSWORD 'somesecret'; \
- CREATE DATABASE synapse \
-  ENCODING 'UTF8' \
-  LC_COLLATE='C' \
-  LC_CTYPE='C' \
-  template=template0;\" | psql" && pg_ctlcluster 13 main stop
+    WORKDIR /data
 
-# Extend the shared homeserver config to disable rate-limiting,
-# set Complement's static shared secret, enable registration, amongst other
-# tweaks to get Synapse ready for testing.
-# To do this, we copy the old template out of the way and then include it
-# with Jinja2.
-RUN mv /conf/shared.yaml.j2 /conf/shared-orig.yaml.j2
-COPY conf/workers-shared-extra.yaml.j2 /conf/shared.yaml.j2
+    COPY conf/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf
 
-WORKDIR /data
+    # Copy the entrypoint
+    COPY conf/start_for_complement.sh /
 
-COPY conf/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf
+    # Expose nginx's listener ports
+    EXPOSE 8008 8448
 
-# Copy the entrypoint
-COPY conf/start_for_complement.sh /
+    ENTRYPOINT ["/start_for_complement.sh"]
 
-# Expose nginx's listener ports
-EXPOSE 8008 8448
-
-ENTRYPOINT ["/start_for_complement.sh"]
-
-# Update the healthcheck to have a shorter check interval
-HEALTHCHECK --start-period=5s --interval=1s --timeout=1s \
-    CMD /bin/sh /healthcheck.sh
+    # Update the healthcheck to have a shorter check interval
+    HEALTHCHECK --start-period=5s --interval=1s --timeout=1s \
+        CMD /bin/sh /healthcheck.sh
diff --git a/docker/complement/conf/postgres.supervisord.conf b/docker/complement/conf/postgres.supervisord.conf
index 5dae3e6330..b88bfc772e 100644
--- a/docker/complement/conf/postgres.supervisord.conf
+++ b/docker/complement/conf/postgres.supervisord.conf
@@ -1,5 +1,5 @@
 [program:postgres]
-command=/usr/local/bin/prefix-log /usr/bin/pg_ctlcluster 13 main start --foreground
+command=/usr/local/bin/prefix-log gosu postgres postgres
 
 # Only start if START_POSTGRES=1
 autostart=%(ENV_START_POSTGRES)s

From 7be954f59b4a8c98752e72c628c853d448b746ad Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 15 Jul 2022 12:06:41 +0100
Subject: [PATCH 129/178] Fix a bug which could lead to incorrect state
 (#13278)

There are two fixes here:
1. A long-standing bug where we incorrectly calculated `delta_ids`; and
2. A bug introduced in #13267 where we got current state incorrect.
---
 changelog.d/13278.bugfix                      |  1 +
 synapse/state/__init__.py                     | 19 ++++++---
 synapse/storage/controllers/persist_events.py |  3 +-
 tests/test_state.py                           | 42 ++++++++++++++++++-
 4 files changed, 58 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13278.bugfix

diff --git a/changelog.d/13278.bugfix b/changelog.d/13278.bugfix
new file mode 100644
index 0000000000..49e9377c79
--- /dev/null
+++ b/changelog.d/13278.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where in rare instances Synapse could store the incorrect state for a room after a state resolution.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index dcd272034d..3a65bd0849 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -83,7 +83,7 @@ def _gen_state_id() -> str:
 
 
 class _StateCacheEntry:
-    __slots__ = ["state", "state_group", "prev_group", "delta_ids"]
+    __slots__ = ["_state", "state_group", "prev_group", "delta_ids"]
 
     def __init__(
         self,
@@ -96,7 +96,10 @@ class _StateCacheEntry:
             raise Exception("Either state or state group must be not None")
 
         # A map from (type, state_key) to event_id.
-        self.state = frozendict(state) if state is not None else None
+        #
+        # This can be None if we have a `state_group` (as then we can fetch the
+        # state from the DB.)
+        self._state = frozendict(state) if state is not None else None
 
         # the ID of a state group if one and only one is involved.
         # otherwise, None otherwise?
@@ -114,8 +117,8 @@ class _StateCacheEntry:
         looking up the state group in the DB.
         """
 
-        if self.state is not None:
-            return self.state
+        if self._state is not None:
+            return self._state
 
         assert self.state_group is not None
 
@@ -128,7 +131,7 @@ class _StateCacheEntry:
         # cache eviction purposes. This is why if `self.state` is None it's fine
         # to return 1.
 
-        return len(self.state) if self.state else 1
+        return len(self._state) if self._state else 1
 
 
 class StateHandler:
@@ -743,6 +746,12 @@ def _make_state_cache_entry(
     delta_ids: Optional[StateMap[str]] = None
 
     for old_group, old_state in state_groups_ids.items():
+        if old_state.keys() - new_state.keys():
+            # Currently we don't support deltas that remove keys from the state
+            # map, so we have to ignore this group as a candidate to base the
+            # new group on.
+            continue
+
         n_delta_ids = {k: v for k, v in new_state.items() if old_state.get(k) != v}
         if not delta_ids or len(n_delta_ids) < len(delta_ids):
             prev_group = old_group
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index af65e5913b..cf98b0ab48 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -948,7 +948,8 @@ class EventsPersistenceStorageController:
                 events_context,
             )
 
-        return res.state, None, new_latest_event_ids
+        full_state = await res.get_state(self._state_controller)
+        return full_state, None, new_latest_event_ids
 
     async def _prune_extremities(
         self,
diff --git a/tests/test_state.py b/tests/test_state.py
index 6ca8d8f21d..e2c0013671 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -21,7 +21,7 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.events import make_event_from_dict
 from synapse.events.snapshot import EventContext
-from synapse.state import StateHandler, StateResolutionHandler
+from synapse.state import StateHandler, StateResolutionHandler, _make_state_cache_entry
 from synapse.util import Clock
 from synapse.util.macaroons import MacaroonGenerator
 
@@ -760,3 +760,43 @@ class StateTestCase(unittest.TestCase):
 
         result = yield defer.ensureDeferred(self.state.compute_event_context(event))
         return result
+
+    def test_make_state_cache_entry(self):
+        "Test that calculating a prev_group and delta is correct"
+
+        new_state = {
+            ("a", ""): "E",
+            ("b", ""): "E",
+            ("c", ""): "E",
+            ("d", ""): "E",
+        }
+
+        # old_state_1 has fewer differences to new_state than old_state_2, but
+        # the delta involves deleting a key, which isn't allowed in the deltas,
+        # so we should pick old_state_2 as the prev_group.
+
+        # `old_state_1` has two differences: `a` and `e`
+        old_state_1 = {
+            ("a", ""): "F",
+            ("b", ""): "E",
+            ("c", ""): "E",
+            ("d", ""): "E",
+            ("e", ""): "E",
+        }
+
+        # `old_state_2` has three differences: `a`, `c` and `d`
+        old_state_2 = {
+            ("a", ""): "F",
+            ("b", ""): "E",
+            ("c", ""): "F",
+            ("d", ""): "F",
+        }
+
+        entry = _make_state_cache_entry(new_state, {1: old_state_1, 2: old_state_2})
+
+        self.assertEqual(entry.prev_group, 2)
+
+        # There are three changes from `old_state_2` to `new_state`
+        self.assertEqual(
+            entry.delta_ids, {("a", ""): "E", ("c", ""): "E", ("d", ""): "E"}
+        )

From b116d3ce00fa64c665c189e3fcc1cf7b41d2b115 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 15 Jul 2022 12:47:26 +0100
Subject: [PATCH 130/178] Bg update to populate new `events` table columns
 (#13215)

These columns were added back in Synapse 1.52, and have been populated for new
events since then. It's now (beyond) time to back-populate them for existing
events.
---
 changelog.d/13215.misc                        |  1 +
 .../databases/main/events_bg_updates.py       | 87 +++++++++++++++++++
 .../delta/72/03bg_populate_events_columns.py  | 47 ++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 changelog.d/13215.misc
 create mode 100644 synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py

diff --git a/changelog.d/13215.misc b/changelog.d/13215.misc
new file mode 100644
index 0000000000..3da35addb3
--- /dev/null
+++ b/changelog.d/13215.misc
@@ -0,0 +1 @@
+Preparation for database schema simplifications: populate `state_key` and `rejection_reason` for existing rows in the `events` table.
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index eeca85fc94..6e8aeed7b4 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -67,6 +67,8 @@ class _BackgroundUpdates:
     EVENT_EDGES_DROP_INVALID_ROWS = "event_edges_drop_invalid_rows"
     EVENT_EDGES_REPLACE_INDEX = "event_edges_replace_index"
 
+    EVENTS_POPULATE_STATE_KEY_REJECTIONS = "events_populate_state_key_rejections"
+
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _CalculateChainCover:
@@ -253,6 +255,11 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             replaces_index="ev_edges_id",
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS,
+            self._background_events_populate_state_key_rejections,
+        )
+
     async def _background_reindex_fields_sender(
         self, progress: JsonDict, batch_size: int
     ) -> int:
@@ -1399,3 +1406,83 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             )
 
         return batch_size
+
+    async def _background_events_populate_state_key_rejections(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Back-populate `events.state_key` and `events.rejection_reason"""
+
+        min_stream_ordering_exclusive = progress["min_stream_ordering_exclusive"]
+        max_stream_ordering_inclusive = progress["max_stream_ordering_inclusive"]
+
+        def _populate_txn(txn: LoggingTransaction) -> bool:
+            """Returns True if we're done."""
+
+            # first we need to find an endpoint.
+            # we need to find the final row in the batch of batch_size, which means
+            # we need to skip over (batch_size-1) rows and get the next row.
+            txn.execute(
+                """
+                SELECT stream_ordering FROM events
+                WHERE stream_ordering > ? AND stream_ordering <= ?
+                ORDER BY stream_ordering
+                LIMIT 1 OFFSET ?
+                """,
+                (
+                    min_stream_ordering_exclusive,
+                    max_stream_ordering_inclusive,
+                    batch_size - 1,
+                ),
+            )
+
+            endpoint = None
+            row = txn.fetchone()
+            if row:
+                endpoint = row[0]
+
+            where_clause = "stream_ordering > ?"
+            args = [min_stream_ordering_exclusive]
+            if endpoint:
+                where_clause += " AND stream_ordering <= ?"
+                args.append(endpoint)
+
+            # now do the updates.
+            txn.execute(
+                f"""
+                UPDATE events
+                SET state_key = (SELECT state_key FROM state_events se WHERE se.event_id = events.event_id),
+                    rejection_reason = (SELECT reason FROM rejections rej WHERE rej.event_id = events.event_id)
+                WHERE ({where_clause})
+                """,
+                args,
+            )
+
+            logger.info(
+                "populated new `events` columns up to %s/%i: updated %i rows",
+                endpoint,
+                max_stream_ordering_inclusive,
+                txn.rowcount,
+            )
+
+            if endpoint is None:
+                # we're done
+                return True
+
+            progress["min_stream_ordering_exclusive"] = endpoint
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS,
+                progress,
+            )
+            return False
+
+        done = await self.db_pool.runInteraction(
+            desc="events_populate_state_key_rejections", func=_populate_txn
+        )
+
+        if done:
+            await self.db_pool.updates._end_background_update(
+                _BackgroundUpdates.EVENTS_POPULATE_STATE_KEY_REJECTIONS
+            )
+
+        return batch_size
diff --git a/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py
new file mode 100644
index 0000000000..55a5d092cc
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py
@@ -0,0 +1,47 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine, *args, **kwargs):
+    """Add a bg update to populate the `state_key` and `rejection_reason` columns of `events`"""
+
+    # we know that any new events will have the columns populated (and that has been
+    # the case since schema_version 68, so there is no chance of rolling back now).
+    #
+    # So, we only need to make sure that existing rows are updated. We read the
+    # current min and max stream orderings, since that is guaranteed to include all
+    # the events that were stored before the new columns were added.
+    cur.execute("SELECT MIN(stream_ordering), MAX(stream_ordering) FROM events")
+    (min_stream_ordering, max_stream_ordering) = cur.fetchone()
+
+    if min_stream_ordering is None:
+        # no rows, nothing to do.
+        return
+
+    cur.execute(
+        "INSERT into background_updates (ordering, update_name, progress_json)"
+        " VALUES (7203, 'events_populate_state_key_rejections', ?)",
+        (
+            json.dumps(
+                {
+                    "min_stream_ordering_exclusive": min_stream_ordering - 1,
+                    "max_stream_ordering_inclusive": max_stream_ordering,
+                }
+            ),
+        ),
+    )

From d765ada84f42d57cda2a3b413df160d65fbb8761 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 15 Jul 2022 13:18:51 +0100
Subject: [PATCH 131/178] Update locked frozendict version to 2.3.2 (#13284)

`frozendict` 2.3.2 includes a fix for a memory leak in
`frozendict.__hash__`. This likely has no impact outside of the
deprecated `/initialSync` endpoint, which uses `StreamToken`s,
containing `RoomStreamToken`s, containing `frozendict`s, as cache keys.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13284.misc |  1 +
 poetry.lock            | 36 ++++++++++++++++++------------------
 2 files changed, 19 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/13284.misc

diff --git a/changelog.d/13284.misc b/changelog.d/13284.misc
new file mode 100644
index 0000000000..fa9743a10e
--- /dev/null
+++ b/changelog.d/13284.misc
@@ -0,0 +1 @@
+Update locked version of `frozendict` to 2.3.2, which has a fix for a memory leak.
diff --git a/poetry.lock b/poetry.lock
index b7c0a6869a..3a08c9478d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -290,7 +290,7 @@ importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "frozendict"
-version = "2.3.0"
+version = "2.3.2"
 description = "A simple immutable dictionary"
 category = "main"
 optional = false
@@ -1753,23 +1753,23 @@ flake8-comprehensions = [
     {file = "flake8_comprehensions-3.8.0-py3-none-any.whl", hash = "sha256:9406314803abe1193c064544ab14fdc43c58424c0882f6ff8a581eb73fc9bb58"},
 ]
 frozendict = [
-    {file = "frozendict-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e18e2abd144a9433b0a8334582843b2aa0d3b9ac8b209aaa912ad365115fe2e1"},
-    {file = "frozendict-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96dc7a02e78da5725e5e642269bb7ae792e0c9f13f10f2e02689175ebbfedb35"},
-    {file = "frozendict-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:752a6dcfaf9bb20a7ecab24980e4dbe041f154509c989207caf185522ef85461"},
-    {file = "frozendict-2.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5346d9fc1c936c76d33975a9a9f1a067342963105d9a403a99e787c939cc2bb2"},
-    {file = "frozendict-2.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60dd2253f1bacb63a7c486ec541a968af4f985ffb06602ee8954a3d39ec6bd2e"},
-    {file = "frozendict-2.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:b2e044602ce17e5cd86724add46660fb9d80169545164e763300a3b839cb1b79"},
-    {file = "frozendict-2.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a27a69b1ac3591e4258325108aee62b53c0eeb6ad0a993ae68d3c7eaea980420"},
-    {file = "frozendict-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f45ef5f6b184d84744fff97b61f6b9a855e24d36b713ea2352fc723a047afa5"},
-    {file = "frozendict-2.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2d3f5016650c0e9a192f5024e68fb4d63f670d0ee58b099ed3f5b4c62ea30ecb"},
-    {file = "frozendict-2.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6cf605916f50aabaaba5624c81eb270200f6c2c466c46960237a125ec8fe3ae0"},
-    {file = "frozendict-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6da06e44904beae4412199d7e49be4f85c6cc168ab06b77c735ea7da5ce3454"},
-    {file = "frozendict-2.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:1f34793fb409c4fa70ffd25bea87b01f3bd305fb1c6b09e7dff085b126302206"},
-    {file = "frozendict-2.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fd72494a559bdcd28aa71f4aa81860269cd0b7c45fff3e2614a0a053ecfd2a13"},
-    {file = "frozendict-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00ea9166aa68cc5feed05986206fdbf35e838a09cb3feef998cf35978ff8a803"},
-    {file = "frozendict-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:9ffaf440648b44e0bc694c1a4701801941378ba3ba6541e17750ae4b4aeeb116"},
-    {file = "frozendict-2.3.0-py3-none-any.whl", hash = "sha256:8578fe06815fcdcc672bd5603eebc98361a5317c1c3a13b28c6c810f6ea3b323"},
-    {file = "frozendict-2.3.0.tar.gz", hash = "sha256:da4231adefc5928e7810da2732269d3ad7b5616295b3e693746392a8205ea0b5"},
+    {file = "frozendict-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4fb171d1e84d17335365877e19d17440373b47ca74a73c06f65ac0b16d01e87f"},
+    {file = "frozendict-2.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a3640e9d7533d164160b758351aa49d9e85bbe0bd76d219d4021e90ffa6a52"},
+    {file = "frozendict-2.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:87cfd00fafbc147d8cd2590d1109b7db8ac8d7d5bdaa708ba46caee132b55d4d"},
+    {file = "frozendict-2.3.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fb09761e093cfabb2f179dbfdb2521e1ec5701df714d1eb5c51fa7849027be19"},
+    {file = "frozendict-2.3.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82176dc7adf01cf8f0193e909401939415a230a1853f4a672ec1629a06ceae18"},
+    {file = "frozendict-2.3.2-cp36-cp36m-win_amd64.whl", hash = "sha256:c1c70826aa4a50fa283fe161834ac4a3ac7c753902c980bb8b595b0998a38ddb"},
+    {file = "frozendict-2.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1db5035ddbed995badd1a62c4102b5e207b5aeb24472df2c60aba79639d7996b"},
+    {file = "frozendict-2.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4246fc4cb1413645ba4d3513939b90d979a5bae724be605a10b2b26ee12f839c"},
+    {file = "frozendict-2.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:680cd42fb0a255da1ce45678ccbd7f69da750d5243809524ebe8f45b2eda6e6b"},
+    {file = "frozendict-2.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a7f3a181d6722c92a9fab12d0c5c2b006a18ca5666098531f316d1e1c8984e3"},
+    {file = "frozendict-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1cb866eabb3c1384a7fe88e1e1033e2b6623073589012ab637c552bf03f6364"},
+    {file = "frozendict-2.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:952c5e5e664578c5c2ce8489ee0ab6a1855da02b58ef593ee728fc10d672641a"},
+    {file = "frozendict-2.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:608b77904cd0117cd816df605a80d0043a5326ee62529327d2136c792165a823"},
+    {file = "frozendict-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eed41fd326f0bcc779837d8d9e1374da1bc9857fe3b9f2910195bbd5fff3aeb"},
+    {file = "frozendict-2.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:bde28db6b5868dd3c45b3555f9d1dc5a1cca6d93591502fa5dcecce0dde6a335"},
+    {file = "frozendict-2.3.2-py3-none-any.whl", hash = "sha256:6882a9bbe08ab9b5ff96ce11bdff3fe40b114b9813bc6801261e2a7b45e20012"},
+    {file = "frozendict-2.3.2.tar.gz", hash = "sha256:7fac4542f0a13fbe704db4942f41ba3abffec5af8b100025973e59dff6a09d0d"},
 ]
 gitdb = [
     {file = "gitdb-4.0.9-py3-none-any.whl", hash = "sha256:8033ad4e853066ba6ca92050b9df2f89301b8fc8bf7e9324d412a63f8bf1a8fd"},

From 7281591f4cf81db2fa9e00187d9a91179c6e6a98 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 15 Jul 2022 13:20:47 +0100
Subject: [PATCH 132/178] Use state before join to determine if we
 `_should_perform_remote_join` (#13270)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 changelog.d/13270.bugfix        |  1 +
 synapse/events/builder.py       |  2 +-
 synapse/handlers/room_member.py | 35 +++++++++++++++++++--------------
 synapse/state/__init__.py       | 21 ++++++++++++--------
 4 files changed, 35 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/13270.bugfix

diff --git a/changelog.d/13270.bugfix b/changelog.d/13270.bugfix
new file mode 100644
index 0000000000..d023b25eea
--- /dev/null
+++ b/changelog.d/13270.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.40 where a user invited to a restricted room would be briefly unable to join.
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 98c203ada0..4caf6cbdee 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -120,7 +120,7 @@ class EventBuilder:
             The signed and hashed event.
         """
         if auth_event_ids is None:
-            state_ids = await self._state.get_current_state_ids(
+            state_ids = await self._state.compute_state_after_events(
                 self.room_id, prev_event_ids
             )
             auth_event_ids = self._event_auth_handler.compute_auth_events(
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 90e0b21600..a5b9ac904e 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -755,14 +755,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         latest_event_ids = await self.store.get_prev_events_for_room(room_id)
 
-        current_state_ids = await self.state_handler.get_current_state_ids(
-            room_id, latest_event_ids=latest_event_ids
+        state_before_join = await self.state_handler.compute_state_after_events(
+            room_id, latest_event_ids
         )
 
         # TODO: Refactor into dictionary of explicitly allowed transitions
         # between old and new state, with specific error messages for some
         # transitions and generic otherwise
-        old_state_id = current_state_ids.get((EventTypes.Member, target.to_string()))
+        old_state_id = state_before_join.get((EventTypes.Member, target.to_string()))
         if old_state_id:
             old_state = await self.store.get_event(old_state_id, allow_none=True)
             old_membership = old_state.content.get("membership") if old_state else None
@@ -813,11 +813,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             if action == "kick":
                 raise AuthError(403, "The target user is not in the room")
 
-        is_host_in_room = await self._is_host_in_room(current_state_ids)
+        is_host_in_room = await self._is_host_in_room(state_before_join)
 
         if effective_membership_state == Membership.JOIN:
             if requester.is_guest:
-                guest_can_join = await self._can_guest_join(current_state_ids)
+                guest_can_join = await self._can_guest_join(state_before_join)
                 if not guest_can_join:
                     # This should be an auth check, but guests are a local concept,
                     # so don't really fit into the general auth process.
@@ -855,7 +855,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
             # Check if a remote join should be performed.
             remote_join, remote_room_hosts = await self._should_perform_remote_join(
-                target.to_string(), room_id, remote_room_hosts, content, is_host_in_room
+                target.to_string(),
+                room_id,
+                remote_room_hosts,
+                content,
+                is_host_in_room,
+                state_before_join,
             )
             if remote_join:
                 if ratelimit:
@@ -995,6 +1000,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         remote_room_hosts: List[str],
         content: JsonDict,
         is_host_in_room: bool,
+        state_before_join: StateMap[str],
     ) -> Tuple[bool, List[str]]:
         """
         Check whether the server should do a remote join (as opposed to a local
@@ -1014,6 +1020,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             content: The content to use as the event body of the join. This may
                 be modified.
             is_host_in_room: True if the host is in the room.
+            state_before_join: The state before the join event (i.e. the resolution of
+                the states after its parent events).
 
         Returns:
             A tuple of:
@@ -1030,20 +1038,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # If the host is in the room, but not one of the authorised hosts
         # for restricted join rules, a remote join must be used.
         room_version = await self.store.get_room_version(room_id)
-        current_state_ids = await self._storage_controllers.state.get_current_state_ids(
-            room_id
-        )
 
         # If restricted join rules are not being used, a local join can always
         # be used.
         if not await self.event_auth_handler.has_restricted_join_rules(
-            current_state_ids, room_version
+            state_before_join, room_version
         ):
             return False, []
 
         # If the user is invited to the room or already joined, the join
         # event can always be issued locally.
-        prev_member_event_id = current_state_ids.get((EventTypes.Member, user_id), None)
+        prev_member_event_id = state_before_join.get((EventTypes.Member, user_id), None)
         prev_member_event = None
         if prev_member_event_id:
             prev_member_event = await self.store.get_event(prev_member_event_id)
@@ -1058,10 +1063,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         #
         # If not, generate a new list of remote hosts based on which
         # can issue invites.
-        event_map = await self.store.get_events(current_state_ids.values())
+        event_map = await self.store.get_events(state_before_join.values())
         current_state = {
             state_key: event_map[event_id]
-            for state_key, event_id in current_state_ids.items()
+            for state_key, event_id in state_before_join.items()
         }
         allowed_servers = get_servers_from_users(
             get_users_which_can_issue_invite(current_state)
@@ -1075,7 +1080,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         # Ensure the member should be allowed access via membership in a room.
         await self.event_auth_handler.check_restricted_join_rules(
-            current_state_ids, room_version, user_id, prev_member_event
+            state_before_join, room_version, user_id, prev_member_event
         )
 
         # If this is going to be a local join, additional information must
@@ -1085,7 +1090,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             EventContentFields.AUTHORISING_USER
         ] = await self.event_auth_handler.get_user_which_could_invite(
             room_id,
-            current_state_ids,
+            state_before_join,
         )
 
         return False, []
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 3a65bd0849..56606e9afb 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -153,22 +153,27 @@ class StateHandler:
             ReplicationUpdateCurrentStateRestServlet.make_client(hs)
         )
 
-    async def get_current_state_ids(
+    async def compute_state_after_events(
         self,
         room_id: str,
-        latest_event_ids: Collection[str],
+        event_ids: Collection[str],
     ) -> StateMap[str]:
-        """Get the current state, or the state at a set of events, for a room
+        """Fetch the state after each of the given event IDs. Resolve them and return.
+
+        This is typically used where `event_ids` is a collection of forward extremities
+        in a room, intended to become the `prev_events` of a new event E. If so, the
+        return value of this function represents the state before E.
 
         Args:
-            room_id:
-            latest_event_ids: The forward extremities to resolve.
+            room_id: the room_id containing the given events.
+            event_ids: the events whose state should be fetched and resolved.
 
         Returns:
-            the state dict, mapping from (event_type, state_key) -> event_id
+            the state dict (a mapping from (event_type, state_key) -> event_id) which
+            holds the resolution of the states after the given event IDs.
         """
-        logger.debug("calling resolve_state_groups from get_current_state_ids")
-        ret = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
+        logger.debug("calling resolve_state_groups from compute_state_after_events")
+        ret = await self.resolve_state_groups_for_events(room_id, event_ids)
         return await ret.get_state(self._state_storage_controller, StateFilter.all())
 
     async def get_current_users_in_room(

From 3343035a06c6e4c5507d3acbd335bb8d4743d9b2 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 15 Jul 2022 08:22:43 -0400
Subject: [PATCH 133/178] Use a real room in the notification rotation tests.
 (#13260)

Instead of manually inserting fake data. This fixes some issues with
having to manually calculate stream orderings and other oddities.
---
 changelog.d/13260.misc                   |   1 +
 tests/storage/test_event_push_actions.py | 193 +++++++++--------------
 2 files changed, 79 insertions(+), 115 deletions(-)
 create mode 100644 changelog.d/13260.misc

diff --git a/changelog.d/13260.misc b/changelog.d/13260.misc
new file mode 100644
index 0000000000..b55ff32c76
--- /dev/null
+++ b/changelog.d/13260.misc
@@ -0,0 +1 @@
+Clean-up tests for notifications.
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index e8c53f16d9..ba40124c8a 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest.mock import Mock
-
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.rest import admin
+from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.storage.databases.main.event_push_actions import NotifCounts
 from synapse.util import Clock
@@ -24,15 +24,14 @@ from tests.unittest import HomeserverTestCase
 
 USER_ID = "@user:example.com"
 
-PlAIN_NOTIF = ["notify", {"set_tweak": "highlight", "value": False}]
-HIGHLIGHT = [
-    "notify",
-    {"set_tweak": "sound", "value": "default"},
-    {"set_tweak": "highlight"},
-]
-
 
 class EventPushActionsStoreTestCase(HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         persist_events_store = hs.get_datastores().persist_events
@@ -54,154 +53,118 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         )
 
     def test_count_aggregation(self) -> None:
-        room_id = "!foo:example.com"
-        user_id = "@user1235:test"
+        # Create a user to receive notifications and send receipts.
+        user_id = self.register_user("user1235", "pass")
+        token = self.login("user1235", "pass")
 
-        last_read_stream_ordering = [0]
+        # And another users to send events.
+        other_id = self.register_user("other", "pass")
+        other_token = self.login("other", "pass")
 
-        def _assert_counts(noitf_count: int, highlight_count: int) -> None:
+        # Create a room and put both users in it.
+        room_id = self.helper.create_room_as(user_id, tok=token)
+        self.helper.join(room_id, other_id, tok=other_token)
+
+        last_event_id: str
+
+        def _assert_counts(
+            noitf_count: int, unread_count: int, highlight_count: int
+        ) -> None:
             counts = self.get_success(
                 self.store.db_pool.runInteraction(
-                    "",
-                    self.store._get_unread_counts_by_pos_txn,
+                    "get-unread-counts",
+                    self.store._get_unread_counts_by_receipt_txn,
                     room_id,
                     user_id,
-                    last_read_stream_ordering[0],
                 )
             )
             self.assertEqual(
                 counts,
                 NotifCounts(
                     notify_count=noitf_count,
-                    unread_count=0,  # Unread counts are tested in the sync tests.
+                    unread_count=unread_count,
                     highlight_count=highlight_count,
                 ),
             )
 
-        def _inject_actions(stream: int, action: list) -> None:
-            event = Mock()
-            event.room_id = room_id
-            event.event_id = f"$test{stream}:example.com"
-            event.internal_metadata.stream_ordering = stream
-            event.internal_metadata.is_outlier.return_value = False
-            event.depth = stream
-
-            self.store._events_stream_cache.entity_has_changed(room_id, stream)
-
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    table="events",
-                    values={
-                        "stream_ordering": stream,
-                        "topological_ordering": stream,
-                        "type": "m.room.message",
-                        "room_id": room_id,
-                        "processed": True,
-                        "outlier": False,
-                        "event_id": event.event_id,
-                    },
-                )
+        def _create_event(highlight: bool = False) -> str:
+            result = self.helper.send_event(
+                room_id,
+                type="m.room.message",
+                content={"msgtype": "m.text", "body": user_id if highlight else "msg"},
+                tok=other_token,
             )
+            nonlocal last_event_id
+            last_event_id = result["event_id"]
+            return last_event_id
 
-            self.get_success(
-                self.store.add_push_actions_to_staging(
-                    event.event_id,
-                    {user_id: action},
-                    False,
-                )
-            )
-            self.get_success(
-                self.store.db_pool.runInteraction(
-                    "",
-                    self.persist_events_store._set_push_actions_for_event_and_users_txn,
-                    [(event, None)],
-                    [(event, None)],
-                )
-            )
-
-        def _rotate(stream: int) -> None:
-            self.get_success(
-                self.store.db_pool.runInteraction(
-                    "rotate-receipts", self.store._handle_new_receipts_for_notifs_txn
-                )
-            )
-
-            self.get_success(
-                self.store.db_pool.runInteraction(
-                    "rotate-notifs", self.store._rotate_notifs_before_txn, stream
-                )
-            )
-
-        def _mark_read(stream: int, depth: int) -> None:
-            last_read_stream_ordering[0] = stream
+        def _rotate() -> None:
+            self.get_success(self.store._rotate_notifs())
 
+        def _mark_read(event_id: str) -> None:
             self.get_success(
                 self.store.insert_receipt(
                     room_id,
                     "m.read",
                     user_id=user_id,
-                    event_ids=[f"$test{stream}:example.com"],
+                    event_ids=[event_id],
                     data={},
                 )
             )
 
-        _assert_counts(0, 0)
-        _inject_actions(1, PlAIN_NOTIF)
-        _assert_counts(1, 0)
-        _rotate(1)
-        _assert_counts(1, 0)
+        _assert_counts(0, 0, 0)
+        _create_event()
+        _assert_counts(1, 1, 0)
+        _rotate()
+        _assert_counts(1, 1, 0)
 
-        _inject_actions(3, PlAIN_NOTIF)
-        _assert_counts(2, 0)
-        _rotate(3)
-        _assert_counts(2, 0)
+        event_id = _create_event()
+        _assert_counts(2, 2, 0)
+        _rotate()
+        _assert_counts(2, 2, 0)
 
-        _inject_actions(5, PlAIN_NOTIF)
-        _mark_read(3, 3)
-        _assert_counts(1, 0)
+        _create_event()
+        _mark_read(event_id)
+        _assert_counts(1, 1, 0)
 
-        _mark_read(5, 5)
-        _assert_counts(0, 0)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0)
 
-        _inject_actions(6, PlAIN_NOTIF)
-        _rotate(6)
-        _assert_counts(1, 0)
+        _create_event()
+        _rotate()
+        _assert_counts(1, 1, 0)
 
-        self.get_success(
-            self.store.db_pool.simple_delete(
-                table="event_push_actions", keyvalues={"1": 1}, desc=""
-            )
-        )
+        # Delete old event push actions, this should not affect the (summarised) count.
+        self.get_success(self.store._remove_old_push_actions_that_have_rotated())
+        _assert_counts(1, 1, 0)
 
-        _assert_counts(1, 0)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0)
 
-        _mark_read(6, 6)
-        _assert_counts(0, 0)
-
-        _inject_actions(8, HIGHLIGHT)
-        _assert_counts(1, 1)
-        _rotate(8)
-        _assert_counts(1, 1)
+        event_id = _create_event(True)
+        _assert_counts(1, 1, 1)
+        _rotate()
+        _assert_counts(1, 1, 1)
 
         # Check that adding another notification and rotating after highlight
         # works.
-        _inject_actions(10, PlAIN_NOTIF)
-        _rotate(10)
-        _assert_counts(2, 1)
+        _create_event()
+        _rotate()
+        _assert_counts(2, 2, 1)
 
         # Check that sending read receipts at different points results in the
         # right counts.
-        _mark_read(8, 8)
-        _assert_counts(1, 0)
-        _mark_read(10, 10)
-        _assert_counts(0, 0)
+        _mark_read(event_id)
+        _assert_counts(1, 1, 0)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0)
 
-        _inject_actions(11, HIGHLIGHT)
-        _assert_counts(1, 1)
-        _mark_read(11, 11)
-        _assert_counts(0, 0)
-        _rotate(11)
-        _assert_counts(0, 0)
+        _create_event(True)
+        _assert_counts(1, 1, 1)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0)
+        _rotate()
+        _assert_counts(0, 0, 0)
 
     def test_find_first_stream_ordering_after_ts(self) -> None:
         def add_event(so: int, ts: int) -> None:

From 0731e0829c08aec7f31fdc72c236757e4cc38747 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 15 Jul 2022 13:59:45 +0100
Subject: [PATCH 134/178] Don't pull out the full state when storing state
 (#13274)

---
 changelog.d/13274.misc                   |   1 +
 synapse/state/__init__.py                |  36 +++---
 synapse/storage/controllers/state.py     |   2 +-
 synapse/storage/databases/state/store.py | 152 +++++++++++++++--------
 tests/rest/client/test_rooms.py          |   4 +-
 tests/test_state.py                      |   4 +
 6 files changed, 130 insertions(+), 69 deletions(-)
 create mode 100644 changelog.d/13274.misc

diff --git a/changelog.d/13274.misc b/changelog.d/13274.misc
new file mode 100644
index 0000000000..a334414320
--- /dev/null
+++ b/changelog.d/13274.misc
@@ -0,0 +1 @@
+Don't pull out state in `compute_event_context` for unconflicted state.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 56606e9afb..fcb7e829d4 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -298,12 +298,18 @@ class StateHandler:
 
             state_group_before_event_prev_group = entry.prev_group
             deltas_to_state_group_before_event = entry.delta_ids
+            state_ids_before_event = None
 
             # We make sure that we have a state group assigned to the state.
             if entry.state_group is None:
-                state_ids_before_event = await entry.get_state(
-                    self._state_storage_controller, StateFilter.all()
-                )
+                # store_state_group requires us to have either a previous state group
+                # (with deltas) or the complete state map. So, if we don't have a
+                # previous state group, load the complete state map now.
+                if state_group_before_event_prev_group is None:
+                    state_ids_before_event = await entry.get_state(
+                        self._state_storage_controller, StateFilter.all()
+                    )
+
                 state_group_before_event = (
                     await self._state_storage_controller.store_state_group(
                         event.event_id,
@@ -316,7 +322,6 @@ class StateHandler:
                 entry.state_group = state_group_before_event
             else:
                 state_group_before_event = entry.state_group
-                state_ids_before_event = None
 
         #
         # now if it's not a state event, we're done
@@ -336,19 +341,20 @@ class StateHandler:
         #
         # otherwise, we'll need to create a new state group for after the event
         #
-        if state_ids_before_event is None:
-            state_ids_before_event = await entry.get_state(
-                self._state_storage_controller, StateFilter.all()
-            )
 
         key = (event.type, event.state_key)
-        if key in state_ids_before_event:
-            replaces = state_ids_before_event[key]
-            if replaces != event.event_id:
-                event.unsigned["replaces_state"] = replaces
 
-        state_ids_after_event = dict(state_ids_before_event)
-        state_ids_after_event[key] = event.event_id
+        if state_ids_before_event is not None:
+            replaces = state_ids_before_event.get(key)
+        else:
+            replaces_state_map = await entry.get_state(
+                self._state_storage_controller, StateFilter.from_types([key])
+            )
+            replaces = replaces_state_map.get(key)
+
+        if replaces and replaces != event.event_id:
+            event.unsigned["replaces_state"] = replaces
+
         delta_ids = {key: event.event_id}
 
         state_group_after_event = (
@@ -357,7 +363,7 @@ class StateHandler:
                 event.room_id,
                 prev_group=state_group_before_event,
                 delta_ids=delta_ids,
-                current_state_ids=state_ids_after_event,
+                current_state_ids=None,
             )
         )
 
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index d3a44bc876..e08f956e6e 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -346,7 +346,7 @@ class StateStorageController:
         room_id: str,
         prev_group: Optional[int],
         delta_ids: Optional[StateMap[str]],
-        current_state_ids: StateMap[str],
+        current_state_ids: Optional[StateMap[str]],
     ) -> int:
         """Store a new set of state, returning a newly assigned state group.
 
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index 609a2b88bf..afbc85ad0c 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -400,14 +400,17 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
         room_id: str,
         prev_group: Optional[int],
         delta_ids: Optional[StateMap[str]],
-        current_state_ids: StateMap[str],
+        current_state_ids: Optional[StateMap[str]],
     ) -> int:
         """Store a new set of state, returning a newly assigned state group.
 
+        At least one of `current_state_ids` and `prev_group` must be provided. Whenever
+        `prev_group` is not None, `delta_ids` must also not be None.
+
         Args:
             event_id: The event ID for which the state was calculated
             room_id
-            prev_group: A previous state group for the room, optional.
+            prev_group: A previous state group for the room.
             delta_ids: The delta between state at `prev_group` and
                 `current_state_ids`, if `prev_group` was given. Same format as
                 `current_state_ids`.
@@ -418,10 +421,41 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             The state group ID
         """
 
-        def _store_state_group_txn(txn: LoggingTransaction) -> int:
-            if current_state_ids is None:
-                # AFAIK, this can never happen
-                raise Exception("current_state_ids cannot be None")
+        if prev_group is None and current_state_ids is None:
+            raise Exception("current_state_ids and prev_group can't both be None")
+
+        if prev_group is not None and delta_ids is None:
+            raise Exception("delta_ids is None when prev_group is not None")
+
+        def insert_delta_group_txn(
+            txn: LoggingTransaction, prev_group: int, delta_ids: StateMap[str]
+        ) -> Optional[int]:
+            """Try and persist the new group as a delta.
+
+            Requires that we have the state as a delta from a previous state group.
+
+            Returns:
+                The state group if successfully created, or None if the state
+                needs to be persisted as a full state.
+            """
+            is_in_db = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="state_groups",
+                keyvalues={"id": prev_group},
+                retcol="id",
+                allow_none=True,
+            )
+            if not is_in_db:
+                raise Exception(
+                    "Trying to persist state with unpersisted prev_group: %r"
+                    % (prev_group,)
+                )
+
+            # if the chain of state group deltas is going too long, we fall back to
+            # persisting a complete state group.
+            potential_hops = self._count_state_group_hops_txn(txn, prev_group)
+            if potential_hops >= MAX_STATE_DELTA_HOPS:
+                return None
 
             state_group = self._state_group_seq_gen.get_next_id_txn(txn)
 
@@ -431,51 +465,45 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 values={"id": state_group, "room_id": room_id, "event_id": event_id},
             )
 
-            # We persist as a delta if we can, while also ensuring the chain
-            # of deltas isn't tooo long, as otherwise read performance degrades.
-            if prev_group:
-                is_in_db = self.db_pool.simple_select_one_onecol_txn(
-                    txn,
-                    table="state_groups",
-                    keyvalues={"id": prev_group},
-                    retcol="id",
-                    allow_none=True,
-                )
-                if not is_in_db:
-                    raise Exception(
-                        "Trying to persist state with unpersisted prev_group: %r"
-                        % (prev_group,)
-                    )
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="state_group_edges",
+                values={"state_group": state_group, "prev_state_group": prev_group},
+            )
 
-                potential_hops = self._count_state_group_hops_txn(txn, prev_group)
-            if prev_group and potential_hops < MAX_STATE_DELTA_HOPS:
-                assert delta_ids is not None
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="state_groups_state",
+                keys=("state_group", "room_id", "type", "state_key", "event_id"),
+                values=[
+                    (state_group, room_id, key[0], key[1], state_id)
+                    for key, state_id in delta_ids.items()
+                ],
+            )
 
-                self.db_pool.simple_insert_txn(
-                    txn,
-                    table="state_group_edges",
-                    values={"state_group": state_group, "prev_state_group": prev_group},
-                )
+            return state_group
 
-                self.db_pool.simple_insert_many_txn(
-                    txn,
-                    table="state_groups_state",
-                    keys=("state_group", "room_id", "type", "state_key", "event_id"),
-                    values=[
-                        (state_group, room_id, key[0], key[1], state_id)
-                        for key, state_id in delta_ids.items()
-                    ],
-                )
-            else:
-                self.db_pool.simple_insert_many_txn(
-                    txn,
-                    table="state_groups_state",
-                    keys=("state_group", "room_id", "type", "state_key", "event_id"),
-                    values=[
-                        (state_group, room_id, key[0], key[1], state_id)
-                        for key, state_id in current_state_ids.items()
-                    ],
-                )
+        def insert_full_state_txn(
+            txn: LoggingTransaction, current_state_ids: StateMap[str]
+        ) -> int:
+            """Persist the full state, returning the new state group."""
+            state_group = self._state_group_seq_gen.get_next_id_txn(txn)
+
+            self.db_pool.simple_insert_txn(
+                txn,
+                table="state_groups",
+                values={"id": state_group, "room_id": room_id, "event_id": event_id},
+            )
+
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="state_groups_state",
+                keys=("state_group", "room_id", "type", "state_key", "event_id"),
+                values=[
+                    (state_group, room_id, key[0], key[1], state_id)
+                    for key, state_id in current_state_ids.items()
+                ],
+            )
 
             # Prefill the state group caches with this group.
             # It's fine to use the sequence like this as the state group map
@@ -491,7 +519,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 self._state_group_members_cache.update,
                 self._state_group_members_cache.sequence,
                 key=state_group,
-                value=dict(current_member_state_ids),
+                value=current_member_state_ids,
             )
 
             current_non_member_state_ids = {
@@ -503,13 +531,35 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 self._state_group_cache.update,
                 self._state_group_cache.sequence,
                 key=state_group,
-                value=dict(current_non_member_state_ids),
+                value=current_non_member_state_ids,
             )
 
             return state_group
 
+        if prev_group is not None:
+            state_group = await self.db_pool.runInteraction(
+                "store_state_group.insert_delta_group",
+                insert_delta_group_txn,
+                prev_group,
+                delta_ids,
+            )
+            if state_group is not None:
+                return state_group
+
+        # We're going to persist the state as a complete group rather than
+        # a delta, so first we need to ensure we have loaded the state map
+        # from the database.
+        if current_state_ids is None:
+            assert prev_group is not None
+            assert delta_ids is not None
+            groups = await self._get_state_for_groups([prev_group])
+            current_state_ids = dict(groups[prev_group])
+            current_state_ids.update(delta_ids)
+
         return await self.db_pool.runInteraction(
-            "store_state_group", _store_state_group_txn
+            "store_state_group.insert_full_state",
+            insert_full_state_txn,
+            current_state_ids,
         )
 
     async def purge_unreferenced_state_groups(
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 8ed5272b16..06221b806a 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -709,7 +709,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(32, channel.resource_usage.db_txn_count)
+        self.assertEqual(36, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -722,7 +722,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(35, channel.resource_usage.db_txn_count)
+        self.assertEqual(40, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
diff --git a/tests/test_state.py b/tests/test_state.py
index e2c0013671..bafd6d1750 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -99,6 +99,10 @@ class _DummyStore:
         state_group = self._next_group
         self._next_group += 1
 
+        if current_state_ids is None:
+            current_state_ids = dict(self._group_to_state[prev_group])
+            current_state_ids.update(delta_ids)
+
         self._group_to_state[state_group] = dict(current_state_ids)
 
         return state_group

From e9ce4d089bbb013f870bbc8d58ec796e8f315eb4 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 15 Jul 2022 16:18:47 +0100
Subject: [PATCH 135/178] Use and recommend poetry 1.1.14, up from 1.1.12
 (#13285)

---
 .ci/scripts/test_old_deps.sh        |  2 +-
 .github/workflows/twisted_trunk.yml |  4 ++--
 changelog.d/13285.misc              |  1 +
 docker/Dockerfile                   |  2 +-
 docs/development/dependencies.md    | 25 +++++++++++++++++++++++++
 5 files changed, 30 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13285.misc

diff --git a/.ci/scripts/test_old_deps.sh b/.ci/scripts/test_old_deps.sh
index 7d0625fa86..478c8d639a 100755
--- a/.ci/scripts/test_old_deps.sh
+++ b/.ci/scripts/test_old_deps.sh
@@ -69,7 +69,7 @@ with open('pyproject.toml', 'w') as f:
 "
 python3 -c "$REMOVE_DEV_DEPENDENCIES"
 
-pipx install poetry==1.1.12
+pipx install poetry==1.1.14
 ~/.local/bin/poetry lock
 
 echo "::group::Patched pyproject.toml"
diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml
index f35e82297f..dd8e6fbb1c 100644
--- a/.github/workflows/twisted_trunk.yml
+++ b/.github/workflows/twisted_trunk.yml
@@ -127,12 +127,12 @@ jobs:
         run: |
           set -x
           DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx
-          pipx install poetry==1.1.12
+          pipx install poetry==1.1.14
 
           poetry remove -n twisted
           poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk
           poetry lock --no-update
-          # NOT IN 1.1.12 poetry lock --check
+          # NOT IN 1.1.14 poetry lock --check
         working-directory: synapse
 
       - run: |
diff --git a/changelog.d/13285.misc b/changelog.d/13285.misc
new file mode 100644
index 0000000000..b7bcbadb5b
--- /dev/null
+++ b/changelog.d/13285.misc
@@ -0,0 +1 @@
+Upgrade from Poetry 1.1.14 to 1.1.12, to fix bugs when locking packages.
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 22707ed142..f4d8e6c925 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -45,7 +45,7 @@ RUN \
 
 # We install poetry in its own build stage to avoid its dependencies conflicting with
 # synapse's dependencies.
-# We use a specific commit from poetry's master branch instead of our usual 1.1.12,
+# We use a specific commit from poetry's master branch instead of our usual 1.1.14,
 # to incorporate fixes to some bugs in `poetry export`. This commit corresponds to
 #    https://github.com/python-poetry/poetry/pull/5156 and
 #    https://github.com/python-poetry/poetry/issues/5141 ;
diff --git a/docs/development/dependencies.md b/docs/development/dependencies.md
index 8ef7d357d8..236856a6b0 100644
--- a/docs/development/dependencies.md
+++ b/docs/development/dependencies.md
@@ -237,3 +237,28 @@ poetry run pip install build && poetry run python -m build
 because [`build`](https://github.com/pypa/build) is a standardish tool which
 doesn't require poetry. (It's what we use in CI too). However, you could try
 `poetry build` too.
+
+
+# Troubleshooting
+
+## Check the version of poetry with `poetry --version`.
+
+At the time of writing, the 1.2 series is beta only. We have seen some examples
+where the lockfiles generated by 1.2 prereleasese aren't interpreted correctly
+by poetry 1.1.x. For now, use poetry 1.1.14, which includes a critical
+[change](https://github.com/python-poetry/poetry/pull/5973) needed to remain
+[compatible with PyPI](https://github.com/pypi/warehouse/pull/11775).
+
+It can also be useful to check the version of `poetry-core` in use. If you've
+installed `poetry` with `pipx`, try `pipx runpip poetry list | grep poetry-core`.
+
+## Clear caches: `poetry cache clear --all pypi`.
+
+Poetry caches a bunch of information about packages that isn't readily available
+from PyPI. (This is what makes poetry seem slow when doing the first
+`poetry install`.) Try `poetry cache list` and `poetry cache clear --all
+<name of cache>` to see if that fixes things.
+
+## Try `--verbose` or `--dry-run` arguments.
+
+Sometimes useful to see what poetry's internal logic is.

From 7b67e93d499cb45f7217e9dfea046ed8b5c455fd Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 15 Jul 2022 11:42:21 -0500
Subject: [PATCH 136/178] Provide more info why we don't have any thumbnails to
 serve (#13038)

Fix https://github.com/matrix-org/synapse/issues/13016

## New error code and status

### Before

Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.

```json
{
  "errcode": "M_NOT_FOUND",
  "error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```

### After

What does the spec say?

> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*

Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.

```json
{
    "errcode": "M_UNKNOWN",
    "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```

> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)


---

We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122

We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
---
 changelog.d/13038.feature                   |  1 +
 synapse/config/repository.py                | 35 ++++++++---
 synapse/rest/media/v1/thumbnail_resource.py | 40 +++++++++++-
 tests/rest/media/v1/test_media_storage.py   | 70 ++++++++++++++++++---
 4 files changed, 129 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/13038.feature

diff --git a/changelog.d/13038.feature b/changelog.d/13038.feature
new file mode 100644
index 0000000000..1278f1b4e9
--- /dev/null
+++ b/changelog.d/13038.feature
@@ -0,0 +1 @@
+Provide more info why we don't have any thumbnails to serve.
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 3c69dd325f..1033496bb4 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -42,6 +42,18 @@ THUMBNAIL_SIZE_YAML = """\
         #    method: %(method)s
 """
 
+# A map from the given media type to the type of thumbnail we should generate
+# for it.
+THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP = {
+    "image/jpeg": "jpeg",
+    "image/jpg": "jpeg",
+    "image/webp": "jpeg",
+    # Thumbnails can only be jpeg or png. We choose png thumbnails for gif
+    # because it can have transparency.
+    "image/gif": "png",
+    "image/png": "png",
+}
+
 HTTP_PROXY_SET_WARNING = """\
 The Synapse config url_preview_ip_range_blacklist will be ignored as an HTTP(s) proxy is configured."""
 
@@ -79,13 +91,22 @@ def parse_thumbnail_requirements(
         width = size["width"]
         height = size["height"]
         method = size["method"]
-        jpeg_thumbnail = ThumbnailRequirement(width, height, method, "image/jpeg")
-        png_thumbnail = ThumbnailRequirement(width, height, method, "image/png")
-        requirements.setdefault("image/jpeg", []).append(jpeg_thumbnail)
-        requirements.setdefault("image/jpg", []).append(jpeg_thumbnail)
-        requirements.setdefault("image/webp", []).append(jpeg_thumbnail)
-        requirements.setdefault("image/gif", []).append(png_thumbnail)
-        requirements.setdefault("image/png", []).append(png_thumbnail)
+
+        for format, thumbnail_format in THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.items():
+            requirement = requirements.setdefault(format, [])
+            if thumbnail_format == "jpeg":
+                requirement.append(
+                    ThumbnailRequirement(width, height, method, "image/jpeg")
+                )
+            elif thumbnail_format == "png":
+                requirement.append(
+                    ThumbnailRequirement(width, height, method, "image/png")
+                )
+            else:
+                raise Exception(
+                    "Unknown thumbnail mapping from %s to %s. This is a Synapse problem, please report!"
+                    % (format, thumbnail_format)
+                )
     return {
         media_type: tuple(thumbnails) for media_type, thumbnails in requirements.items()
     }
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 2295adfaa7..5f725c7600 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -17,9 +17,11 @@
 import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, SynapseError, cs_error
+from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
 from synapse.http.server import (
     DirectServeJsonResource,
+    respond_with_json,
     set_corp_headers,
     set_cors_headers,
 )
@@ -309,6 +311,19 @@ class ThumbnailResource(DirectServeJsonResource):
             url_cache: True if this is from a URL cache.
             server_name: The server name, if this is a remote thumbnail.
         """
+        logger.debug(
+            "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
+            media_id,
+            desired_width,
+            desired_height,
+            desired_method,
+            thumbnail_infos,
+        )
+
+        # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
+        # different code path to handle it.
+        assert not self.dynamic_thumbnails
+
         if thumbnail_infos:
             file_info = self._select_thumbnail(
                 desired_width,
@@ -384,8 +399,29 @@ class ThumbnailResource(DirectServeJsonResource):
                 file_info.thumbnail.length,
             )
         else:
+            # This might be because:
+            # 1. We can't create thumbnails for the given media (corrupted or
+            #    unsupported file type), or
+            # 2. The thumbnailing process never ran or errored out initially
+            #    when the media was first uploaded (these bugs should be
+            #    reported and fixed).
+            # Note that we don't attempt to generate a thumbnail now because
+            # `dynamic_thumbnails` is disabled.
             logger.info("Failed to find any generated thumbnails")
-            respond_404(request)
+
+            respond_with_json(
+                request,
+                400,
+                cs_error(
+                    "Cannot find any thumbnails for the requested media (%r). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
+                    % (
+                        request.postpath,
+                        ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
+                    ),
+                    code=Codes.UNKNOWN,
+                ),
+                send_cors=True,
+            )
 
     def _select_thumbnail(
         self,
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 79727c430f..d18fc13c21 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -126,7 +126,9 @@ class _TestImage:
         expected_scaled: The expected bytes from scaled thumbnailing, or None if
             test should just check for a valid image returned.
         expected_found: True if the file should exist on the server, or False if
-            a 404 is expected.
+            a 404/400 is expected.
+        unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
+            False if the thumbnailing should succeed or a normal 404 is expected.
     """
 
     data: bytes
@@ -135,6 +137,7 @@ class _TestImage:
     expected_cropped: Optional[bytes] = None
     expected_scaled: Optional[bytes] = None
     expected_found: bool = True
+    unable_to_thumbnail: bool = False
 
 
 @parameterized_class(
@@ -192,6 +195,7 @@ class _TestImage:
                 b"image/gif",
                 b".gif",
                 expected_found=False,
+                unable_to_thumbnail=True,
             ),
         ),
     ],
@@ -366,18 +370,29 @@ class MediaRepoTests(unittest.HomeserverTestCase):
     def test_thumbnail_crop(self) -> None:
         """Test that a cropped remote thumbnail is available."""
         self._test_thumbnail(
-            "crop", self.test_image.expected_cropped, self.test_image.expected_found
+            "crop",
+            self.test_image.expected_cropped,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
         )
 
     def test_thumbnail_scale(self) -> None:
         """Test that a scaled remote thumbnail is available."""
         self._test_thumbnail(
-            "scale", self.test_image.expected_scaled, self.test_image.expected_found
+            "scale",
+            self.test_image.expected_scaled,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
         )
 
     def test_invalid_type(self) -> None:
         """An invalid thumbnail type is never available."""
-        self._test_thumbnail("invalid", None, False)
+        self._test_thumbnail(
+            "invalid",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
 
     @unittest.override_config(
         {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
@@ -386,7 +401,12 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         """
         Override the config to generate only scaled thumbnails, but request a cropped one.
         """
-        self._test_thumbnail("crop", None, False)
+        self._test_thumbnail(
+            "crop",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
 
     @unittest.override_config(
         {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
@@ -395,14 +415,22 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         """
         Override the config to generate only cropped thumbnails, but request a scaled one.
         """
-        self._test_thumbnail("scale", None, False)
+        self._test_thumbnail(
+            "scale",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
 
     def test_thumbnail_repeated_thumbnail(self) -> None:
         """Test that fetching the same thumbnail works, and deleting the on disk
         thumbnail regenerates it.
         """
         self._test_thumbnail(
-            "scale", self.test_image.expected_scaled, self.test_image.expected_found
+            "scale",
+            self.test_image.expected_scaled,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
         )
 
         if not self.test_image.expected_found:
@@ -459,8 +487,24 @@ class MediaRepoTests(unittest.HomeserverTestCase):
             )
 
     def _test_thumbnail(
-        self, method: str, expected_body: Optional[bytes], expected_found: bool
+        self,
+        method: str,
+        expected_body: Optional[bytes],
+        expected_found: bool,
+        unable_to_thumbnail: bool = False,
     ) -> None:
+        """Test the given thumbnailing method works as expected.
+
+        Args:
+            method: The thumbnailing method to use (crop, scale).
+            expected_body: The expected bytes from thumbnailing, or None if
+                test should just check for a valid image.
+            expected_found: True if the file should exist on the server, or False if
+                a 404/400 is expected.
+            unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
+                False if the thumbnailing should succeed or a normal 404 is expected.
+        """
+
         params = "?width=32&height=32&method=" + method
         channel = make_request(
             self.reactor,
@@ -496,6 +540,16 @@ class MediaRepoTests(unittest.HomeserverTestCase):
             else:
                 # ensure that the result is at least some valid image
                 Image.open(BytesIO(channel.result["body"]))
+        elif unable_to_thumbnail:
+            # A 400 with a JSON body.
+            self.assertEqual(channel.code, 400)
+            self.assertEqual(
+                channel.json_body,
+                {
+                    "errcode": "M_UNKNOWN",
+                    "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
+                },
+            )
         else:
             # A 404 with a JSON body.
             self.assertEqual(channel.code, 404)

From 96cf81e312407f0caba1b45ba9899906b1dcc098 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 15 Jul 2022 21:31:27 +0200
Subject: [PATCH 137/178] Use HTTPStatus constants in place of literals in
 tests. (#13297)

---
 changelog.d/13297.misc                      |   1 +
 tests/federation/test_complexity.py         |   5 +-
 tests/federation/test_federation_server.py  |  11 +-
 tests/federation/transport/test_knocking.py |   5 +-
 tests/handlers/test_password_providers.py   |  41 +--
 tests/rest/admin/test_user.py               |  16 +-
 tests/rest/client/test_account.py           |  71 ++--
 tests/rest/client/test_login.py             |  55 ++--
 tests/rest/client/test_rooms.py             | 341 +++++++++++---------
 9 files changed, 308 insertions(+), 238 deletions(-)
 create mode 100644 changelog.d/13297.misc

diff --git a/changelog.d/13297.misc b/changelog.d/13297.misc
new file mode 100644
index 0000000000..545a62369f
--- /dev/null
+++ b/changelog.d/13297.misc
@@ -0,0 +1 @@
+Use `HTTPStatus` constants in place of literals in tests.
\ No newline at end of file
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
index 9f1115dd23..c6dd99316a 100644
--- a/tests/federation/test_complexity.py
+++ b/tests/federation/test_complexity.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from http import HTTPStatus
 from unittest.mock import Mock
 
 from synapse.api.errors import Codes, SynapseError
@@ -50,7 +51,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         channel = self.make_signed_federation_request(
             "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,)
         )
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         complexity = channel.json_body["v1"]
         self.assertTrue(complexity > 0, complexity)
 
@@ -62,7 +63,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         channel = self.make_signed_federation_request(
             "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,)
         )
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         complexity = channel.json_body["v1"]
         self.assertEqual(complexity, 1.23)
 
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index fd15e680ed..8ea13ceb93 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from http import HTTPStatus
 
 from parameterized import parameterized
 
@@ -58,7 +59,7 @@ class FederationServerTests(unittest.FederatingHomeserverTestCase):
             "/_matrix/federation/v1/get_missing_events/%s" % (room_1,),
             query_content,
         )
-        self.assertEqual(400, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON")
 
 
@@ -119,7 +120,7 @@ class StateQueryTests(unittest.FederatingHomeserverTestCase):
         channel = self.make_signed_federation_request(
             "GET", "/_matrix/federation/v1/state/%s?event_id=xyz" % (room_1,)
         )
-        self.assertEqual(403, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
 
 
@@ -153,7 +154,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
             f"/_matrix/federation/v1/make_join/{self._room_id}/{user_id}"
             f"?ver={DEFAULT_ROOM_VERSION}",
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
         return channel.json_body
 
     def test_send_join(self):
@@ -171,7 +172,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
             f"/_matrix/federation/v2/send_join/{self._room_id}/x",
             content=join_event_dict,
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         # we should get complete room state back
         returned_state = [
@@ -226,7 +227,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
             f"/_matrix/federation/v2/send_join/{self._room_id}/x?org.matrix.msc3706.partial_state=true",
             content=join_event_dict,
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         # expect a reduced room state
         returned_state = [
diff --git a/tests/federation/transport/test_knocking.py b/tests/federation/transport/test_knocking.py
index d21c11b716..0d048207b7 100644
--- a/tests/federation/transport/test_knocking.py
+++ b/tests/federation/transport/test_knocking.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import OrderedDict
+from http import HTTPStatus
 from typing import Dict, List
 
 from synapse.api.constants import EventTypes, JoinRules, Membership
@@ -255,7 +256,7 @@ class FederationKnockingTestCase(
                 RoomVersions.V7.identifier,
             ),
         )
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
         # Note: We don't expect the knock membership event to be sent over federation as
         # part of the stripped room state, as the knocking homeserver already has that
@@ -293,7 +294,7 @@ class FederationKnockingTestCase(
             % (room_id, signed_knock_event.event_id),
             signed_knock_event_json,
         )
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
         # Check that we got the stripped room state in return
         room_state_events = channel.json_body["knock_state_events"]
diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py
index 82b3bb3b73..4c62449c89 100644
--- a/tests/handlers/test_password_providers.py
+++ b/tests/handlers/test_password_providers.py
@@ -14,6 +14,7 @@
 
 """Tests for the password_auth_provider interface"""
 
+from http import HTTPStatus
 from typing import Any, Type, Union
 from unittest.mock import Mock
 
@@ -188,14 +189,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         # check_password must return an awaitable
         mock_password_provider.check_password.return_value = make_awaitable(True)
         channel = self._send_password_login("u", "p")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@u:test", channel.json_body["user_id"])
         mock_password_provider.check_password.assert_called_once_with("@u:test", "p")
         mock_password_provider.reset_mock()
 
         # login with mxid should work too
         channel = self._send_password_login("@u:bz", "p")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@u:bz", channel.json_body["user_id"])
         mock_password_provider.check_password.assert_called_once_with("@u:bz", "p")
         mock_password_provider.reset_mock()
@@ -204,7 +205,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         # in these cases, but at least we can guard against the API changing
         # unexpectedly
         channel = self._send_password_login(" USER🙂NAME ", " pASS\U0001F622word ")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@ USER🙂NAME :test", channel.json_body["user_id"])
         mock_password_provider.check_password.assert_called_once_with(
             "@ USER🙂NAME :test", " pASS😢word "
@@ -258,10 +259,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         # check_password must return an awaitable
         mock_password_provider.check_password.return_value = make_awaitable(False)
         channel = self._send_password_login("u", "p")
-        self.assertEqual(channel.code, 403, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result)
 
         channel = self._send_password_login("localuser", "localpass")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@localuser:test", channel.json_body["user_id"])
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
@@ -382,7 +383,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # login shouldn't work and should be rejected with a 400 ("unknown login type")
         channel = self._send_password_login("u", "p")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
         mock_password_provider.check_password.assert_not_called()
 
     @override_config(legacy_providers_config(LegacyCustomAuthProvider))
@@ -406,14 +407,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # login with missing param should be rejected
         channel = self._send_login("test.login_type", "u")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
         mock_password_provider.check_auth.assert_not_called()
 
         mock_password_provider.check_auth.return_value = make_awaitable(
             ("@user:bz", None)
         )
         channel = self._send_login("test.login_type", "u", test_field="y")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@user:bz", channel.json_body["user_id"])
         mock_password_provider.check_auth.assert_called_once_with(
             "u", "test.login_type", {"test_field": "y"}
@@ -427,7 +428,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             ("@ MALFORMED! :bz", None)
         )
         channel = self._send_login("test.login_type", " USER🙂NAME ", test_field=" abc ")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@ MALFORMED! :bz", channel.json_body["user_id"])
         mock_password_provider.check_auth.assert_called_once_with(
             " USER🙂NAME ", "test.login_type", {"test_field": " abc "}
@@ -510,7 +511,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             ("@user:bz", callback)
         )
         channel = self._send_login("test.login_type", "u", test_field="y")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertEqual("@user:bz", channel.json_body["user_id"])
         mock_password_provider.check_auth.assert_called_once_with(
             "u", "test.login_type", {"test_field": "y"}
@@ -549,7 +550,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # login shouldn't work and should be rejected with a 400 ("unknown login type")
         channel = self._send_password_login("localuser", "localpass")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
         mock_password_provider.check_auth.assert_not_called()
 
     @override_config(
@@ -584,7 +585,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # login shouldn't work and should be rejected with a 400 ("unknown login type")
         channel = self._send_password_login("localuser", "localpass")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
         mock_password_provider.check_auth.assert_not_called()
 
     @override_config(
@@ -615,7 +616,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # login shouldn't work and should be rejected with a 400 ("unknown login type")
         channel = self._send_password_login("localuser", "localpass")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
         mock_password_provider.check_auth.assert_not_called()
         mock_password_provider.check_password.assert_not_called()
 
@@ -646,13 +647,13 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             ("@localuser:test", None)
         )
         channel = self._send_login("test.login_type", "localuser", test_field="")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         tok1 = channel.json_body["access_token"]
 
         channel = self._send_login(
             "test.login_type", "localuser", test_field="", device_id="dev2"
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         # make the initial request which returns a 401
         channel = self._delete_device(tok1, "dev2")
@@ -721,7 +722,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         # password login shouldn't work and should be rejected with a 400
         # ("unknown login type")
         channel = self._send_password_login("localuser", "localpass")
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
 
     def test_on_logged_out(self):
         """Tests that the on_logged_out callback is called when the user logs out."""
@@ -884,7 +885,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             },
             access_token=tok,
         )
-        self.assertEqual(channel.code, 403, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result)
         self.assertEqual(
             channel.json_body["errcode"],
             Codes.THREEPID_DENIED,
@@ -906,7 +907,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             },
             access_token=tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         self.assertIn("sid", channel.json_body)
 
         m.assert_called_once_with("email", "bar@test.com", registration)
@@ -949,12 +950,12 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "register",
             {"auth": {"session": session, "type": LoginType.DUMMY}},
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
         return channel.json_body
 
     def _get_login_flows(self) -> JsonDict:
         channel = self.make_request("GET", "/_matrix/client/r0/login")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         return channel.json_body["flows"]
 
     def _send_password_login(self, user: str, password: str) -> FakeChannel:
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 97693cd1e2..12db68d564 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1379,7 +1379,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=body,
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
@@ -1434,7 +1434,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=body,
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
@@ -1512,7 +1512,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"password": "abc123", "admin": False},
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["admin"])
 
@@ -1550,7 +1550,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         )
 
         # Admin user is not blocked by mau anymore
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["admin"])
 
@@ -1585,7 +1585,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=body,
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1626,7 +1626,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=body,
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1666,7 +1666,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=body,
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("msisdn", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("1234567890", channel.json_body["threepids"][0]["address"])
@@ -2407,7 +2407,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"password": "abc123"},
         )
 
-        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual(HTTPStatus.CREATED, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("bob", channel.json_body["displayname"])
 
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index 1f9b65351e..6d6a26b8f4 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -15,6 +15,7 @@ import json
 import os
 import re
 from email.parser import Parser
+from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Union
 from unittest.mock import Mock
 
@@ -98,7 +99,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
         channel = self.make_request(
             "POST", "/_matrix/client/r0/login", json.dumps(body).encode("utf8")
         )
-        self.assertEqual(channel.code, 403, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result)
 
     def test_basic_password_reset(self) -> None:
         """Test basic password reset flow"""
@@ -347,7 +348,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
             shorthand=False,
         )
 
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
         # Now POST to the same endpoint, mimicking the same behaviour as clicking the
         # password reset confirm button
@@ -362,7 +363,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
             shorthand=False,
             content_is_form=True,
         )
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
     def _get_link_from_email(self) -> str:
         assert self.email_attempts, "No emails have been sent"
@@ -390,7 +391,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
         new_password: str,
         session_id: str,
         client_secret: str,
-        expected_code: int = 200,
+        expected_code: int = HTTPStatus.OK,
     ) -> None:
         channel = self.make_request(
             "POST",
@@ -715,7 +716,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             },
             access_token=self.user_id_tok,
         )
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
         self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
         # Get user
@@ -725,7 +728,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertFalse(channel.json_body["threepids"])
 
     def test_delete_email(self) -> None:
@@ -747,7 +750,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             {"medium": "email", "address": self.email},
             access_token=self.user_id_tok,
         )
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # Get user
         channel = self.make_request(
@@ -756,7 +759,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertFalse(channel.json_body["threepids"])
 
     def test_delete_email_if_disabled(self) -> None:
@@ -781,7 +784,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
         self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
         # Get user
@@ -791,7 +796,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual(self.email, channel.json_body["threepids"][0]["address"])
 
@@ -817,7 +822,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             },
             access_token=self.user_id_tok,
         )
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
         self.assertEqual(Codes.THREEPID_AUTH_FAILED, channel.json_body["errcode"])
 
         # Get user
@@ -827,7 +834,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertFalse(channel.json_body["threepids"])
 
     def test_no_valid_token(self) -> None:
@@ -852,7 +859,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             },
             access_token=self.user_id_tok,
         )
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
         self.assertEqual(Codes.THREEPID_AUTH_FAILED, channel.json_body["errcode"])
 
         # Get user
@@ -862,7 +871,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertFalse(channel.json_body["threepids"])
 
     @override_config({"next_link_domain_whitelist": None})
@@ -872,7 +881,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             "something@example.com",
             "some_secret",
             next_link="https://example.com/a/good/site",
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
     @override_config({"next_link_domain_whitelist": None})
@@ -884,7 +893,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             "something@example.com",
             "some_secret",
             next_link="some-protocol://abcdefghijklmopqrstuvwxyz",
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
     @override_config({"next_link_domain_whitelist": None})
@@ -895,7 +904,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             "something@example.com",
             "some_secret",
             next_link="file:///host/path",
-            expect_code=400,
+            expect_code=HTTPStatus.BAD_REQUEST,
         )
 
     @override_config({"next_link_domain_whitelist": ["example.com", "example.org"]})
@@ -907,28 +916,28 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             "something@example.com",
             "some_secret",
             next_link=None,
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
         self._request_token(
             "something@example.com",
             "some_secret",
             next_link="https://example.com/some/good/page",
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
         self._request_token(
             "something@example.com",
             "some_secret",
             next_link="https://example.org/some/also/good/page",
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
         self._request_token(
             "something@example.com",
             "some_secret",
             next_link="https://bad.example.org/some/bad/page",
-            expect_code=400,
+            expect_code=HTTPStatus.BAD_REQUEST,
         )
 
     @override_config({"next_link_domain_whitelist": []})
@@ -940,7 +949,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             "something@example.com",
             "some_secret",
             next_link="https://example.com/a/page",
-            expect_code=400,
+            expect_code=HTTPStatus.BAD_REQUEST,
         )
 
     def _request_token(
@@ -948,7 +957,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
         email: str,
         client_secret: str,
         next_link: Optional[str] = None,
-        expect_code: int = 200,
+        expect_code: int = HTTPStatus.OK,
     ) -> Optional[str]:
         """Request a validation token to add an email address to a user's account
 
@@ -993,7 +1002,9 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             b"account/3pid/email/requestToken",
             {"client_secret": client_secret, "email": email, "send_attempt": 1},
         )
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
         self.assertEqual(expected_errcode, channel.json_body["errcode"])
         self.assertEqual(expected_error, channel.json_body["error"])
 
@@ -1002,7 +1013,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
         path = link.replace("https://example.com", "")
 
         channel = self.make_request("GET", path, shorthand=False)
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
     def _get_link_from_email(self) -> str:
         assert self.email_attempts, "No emails have been sent"
@@ -1052,7 +1063,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # Get user
         channel = self.make_request(
@@ -1061,7 +1072,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
             access_token=self.user_id_tok,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
 
         threepids = {threepid["address"] for threepid in channel.json_body["threepids"]}
@@ -1092,7 +1103,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase):
         """Tests that not providing any MXID raises an error."""
         self._test_status(
             users=None,
-            expected_status_code=400,
+            expected_status_code=HTTPStatus.BAD_REQUEST,
             expected_errcode=Codes.MISSING_PARAM,
         )
 
@@ -1100,7 +1111,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase):
         """Tests that providing an invalid MXID raises an error."""
         self._test_status(
             users=["bad:test"],
-            expected_status_code=400,
+            expected_status_code=HTTPStatus.BAD_REQUEST,
             expected_errcode=Codes.INVALID_PARAM,
         )
 
@@ -1286,7 +1297,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase):
     def _test_status(
         self,
         users: Optional[List[str]],
-        expected_status_code: int = 200,
+        expected_status_code: int = HTTPStatus.OK,
         expected_statuses: Optional[Dict[str, Dict[str, bool]]] = None,
         expected_failures: Optional[List[str]] = None,
         expected_errcode: Optional[str] = None,
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index f6efa5fe37..e7f5517e34 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -14,6 +14,7 @@
 import json
 import time
 import urllib.parse
+from http import HTTPStatus
 from typing import Any, Dict, List, Optional
 from unittest.mock import Mock
 from urllib.parse import urlencode
@@ -261,20 +262,20 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         }
         channel = self.make_request(b"POST", LOGIN_URL, params)
 
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         access_token = channel.json_body["access_token"]
         device_id = channel.json_body["device_id"]
 
         # we should now be able to make requests with the access token
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         # time passes
         self.reactor.advance(24 * 3600)
 
         # ... and we should be soft-logouted
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN")
         self.assertEqual(channel.json_body["soft_logout"], True)
 
@@ -288,7 +289,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         # more requests with the expired token should still return a soft-logout
         self.reactor.advance(3600)
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN")
         self.assertEqual(channel.json_body["soft_logout"], True)
 
@@ -296,7 +297,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         self._delete_device(access_token_2, "kermit", "monkey", device_id)
 
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN")
         self.assertEqual(channel.json_body["soft_logout"], False)
 
@@ -307,7 +308,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         channel = self.make_request(
             b"DELETE", "devices/" + device_id, access_token=access_token
         )
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         # check it's a UI-Auth fail
         self.assertEqual(
             set(channel.json_body.keys()),
@@ -330,7 +331,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             access_token=access_token,
             content={"auth": auth},
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
     @override_config({"session_lifetime": "24h"})
     def test_session_can_hard_logout_after_being_soft_logged_out(self) -> None:
@@ -341,14 +342,14 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
 
         # we should now be able to make requests with the access token
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         # time passes
         self.reactor.advance(24 * 3600)
 
         # ... and we should be soft-logouted
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN")
         self.assertEqual(channel.json_body["soft_logout"], True)
 
@@ -367,14 +368,14 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
 
         # we should now be able to make requests with the access token
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         # time passes
         self.reactor.advance(24 * 3600)
 
         # ... and we should be soft-logouted
         channel = self.make_request(b"GET", TEST_URL, access_token=access_token)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_UNKNOWN_TOKEN")
         self.assertEqual(channel.json_body["soft_logout"], True)
 
@@ -466,7 +467,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
     def test_get_login_flows(self) -> None:
         """GET /login should return password and SSO flows"""
         channel = self.make_request("GET", "/_matrix/client/r0/login")
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         expected_flow_types = [
             "m.login.cas",
@@ -494,14 +495,14 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         """/login/sso/redirect should redirect to an identity picker"""
         # first hit the redirect url, which should redirect to our idp picker
         channel = self._make_sso_redirect_request(None)
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         uri = location_headers[0]
 
         # hitting that picker should give us some HTML
         channel = self.make_request("GET", uri)
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         # parse the form to check it has fields assumed elsewhere in this class
         html = channel.result["body"].decode("utf-8")
@@ -530,7 +531,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             + "&idp=cas",
             shorthand=False,
         )
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         cas_uri = location_headers[0]
@@ -555,7 +556,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL)
             + "&idp=saml",
         )
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         saml_uri = location_headers[0]
@@ -579,7 +580,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL)
             + "&idp=oidc",
         )
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         oidc_uri = location_headers[0]
@@ -606,7 +607,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         channel = self.helper.complete_oidc_auth(oidc_uri, cookies, {"sub": "user1"})
 
         # that should serve a confirmation page
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         content_type_headers = channel.headers.getRawHeaders("Content-Type")
         assert content_type_headers
         self.assertTrue(content_type_headers[-1].startswith("text/html"))
@@ -634,7 +635,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             "/login",
             content={"type": "m.login.token", "token": login_token},
         )
-        self.assertEqual(chan.code, 200, chan.result)
+        self.assertEqual(chan.code, HTTPStatus.OK, chan.result)
         self.assertEqual(chan.json_body["user_id"], "@user1:test")
 
     def test_multi_sso_redirect_to_unknown(self) -> None:
@@ -643,18 +644,18 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             "GET",
             "/_synapse/client/pick_idp?redirectUrl=http://x&idp=xyz",
         )
-        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
 
     def test_client_idp_redirect_to_unknown(self) -> None:
         """If the client tries to pick an unknown IdP, return a 404"""
         channel = self._make_sso_redirect_request("xxx")
-        self.assertEqual(channel.code, 404, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.NOT_FOUND, channel.result)
         self.assertEqual(channel.json_body["errcode"], "M_NOT_FOUND")
 
     def test_client_idp_redirect_to_oidc(self) -> None:
         """If the client pick a known IdP, redirect to it"""
         channel = self._make_sso_redirect_request("oidc")
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         oidc_uri = location_headers[0]
@@ -765,7 +766,7 @@ class CASTestCase(unittest.HomeserverTestCase):
         channel = self.make_request("GET", cas_ticket_url)
 
         # Test that the response is HTML.
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         content_type_header_value = ""
         for header in channel.result.get("headers", []):
             if header[0] == b"Content-Type":
@@ -1246,7 +1247,7 @@ class UsernamePickerTestCase(HomeserverTestCase):
         )
 
         # that should redirect to the username picker
-        self.assertEqual(channel.code, 302, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
         picker_url = location_headers[0]
@@ -1290,7 +1291,7 @@ class UsernamePickerTestCase(HomeserverTestCase):
                 ("Content-Length", str(len(content))),
             ],
         )
-        self.assertEqual(chan.code, 302, chan.result)
+        self.assertEqual(chan.code, HTTPStatus.FOUND, chan.result)
         location_headers = chan.headers.getRawHeaders("Location")
         assert location_headers
 
@@ -1300,7 +1301,7 @@ class UsernamePickerTestCase(HomeserverTestCase):
             path=location_headers[0],
             custom_headers=[("Cookie", "username_mapping_session=" + session_id)],
         )
-        self.assertEqual(chan.code, 302, chan.result)
+        self.assertEqual(chan.code, HTTPStatus.FOUND, chan.result)
         location_headers = chan.headers.getRawHeaders("Location")
         assert location_headers
 
@@ -1325,5 +1326,5 @@ class UsernamePickerTestCase(HomeserverTestCase):
             "/login",
             content={"type": "m.login.token", "token": login_token},
         )
-        self.assertEqual(chan.code, 200, chan.result)
+        self.assertEqual(chan.code, HTTPStatus.OK, chan.result)
         self.assertEqual(chan.json_body["user_id"], "@bobby:test")
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 06221b806a..4c6b3decd8 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -18,6 +18,7 @@
 """Tests REST events for /rooms paths."""
 
 import json
+from http import HTTPStatus
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 from unittest.mock import Mock, call
 from urllib import parse as urlparse
@@ -104,7 +105,7 @@ class RoomPermissionsTestCase(RoomBase):
         channel = self.make_request(
             "PUT", self.created_rmid_msg_path, b'{"msgtype":"m.text","body":"test msg"}'
         )
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
         # set topic for public room
         channel = self.make_request(
@@ -112,7 +113,7 @@ class RoomPermissionsTestCase(RoomBase):
             ("rooms/%s/state/m.room.topic" % self.created_public_rmid).encode("ascii"),
             b'{"topic":"Public Room Topic"}',
         )
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
 
         # auth as user_id now
         self.helper.auth_user_id = self.user_id
@@ -134,28 +135,28 @@ class RoomPermissionsTestCase(RoomBase):
             "/rooms/%s/send/m.room.message/mid2" % (self.uncreated_rmid,),
             msg_content,
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # send message in created room not joined (no state), expect 403
         channel = self.make_request("PUT", send_msg_path(), msg_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # send message in created room and invited, expect 403
         self.helper.invite(
             room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id
         )
         channel = self.make_request("PUT", send_msg_path(), msg_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # send message in created room and joined, expect 200
         self.helper.join(room=self.created_rmid, user=self.user_id)
         channel = self.make_request("PUT", send_msg_path(), msg_content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # send message in created room and left, expect 403
         self.helper.leave(room=self.created_rmid, user=self.user_id)
         channel = self.make_request("PUT", send_msg_path(), msg_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_topic_perms(self) -> None:
         topic_content = b'{"topic":"My Topic Name"}'
@@ -165,28 +166,28 @@ class RoomPermissionsTestCase(RoomBase):
         channel = self.make_request(
             "PUT", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid, topic_content
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         channel = self.make_request(
             "GET", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # set/get topic in created PRIVATE room not joined, expect 403
         channel = self.make_request("PUT", topic_path, topic_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         channel = self.make_request("GET", topic_path)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # set topic in created PRIVATE room and invited, expect 403
         self.helper.invite(
             room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id
         )
         channel = self.make_request("PUT", topic_path, topic_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # get topic in created PRIVATE room and invited, expect 403
         channel = self.make_request("GET", topic_path)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # set/get topic in created PRIVATE room and joined, expect 200
         self.helper.join(room=self.created_rmid, user=self.user_id)
@@ -194,25 +195,25 @@ class RoomPermissionsTestCase(RoomBase):
         # Only room ops can set topic by default
         self.helper.auth_user_id = self.rmcreator_id
         channel = self.make_request("PUT", topic_path, topic_content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.helper.auth_user_id = self.user_id
 
         channel = self.make_request("GET", topic_path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assert_dict(json.loads(topic_content.decode("utf8")), channel.json_body)
 
         # set/get topic in created PRIVATE room and left, expect 403
         self.helper.leave(room=self.created_rmid, user=self.user_id)
         channel = self.make_request("PUT", topic_path, topic_content)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         channel = self.make_request("GET", topic_path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # get topic in PUBLIC room, not joined, expect 403
         channel = self.make_request(
             "GET", "/rooms/%s/state/m.room.topic" % self.created_public_rmid
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         # set topic in PUBLIC room, not joined, expect 403
         channel = self.make_request(
@@ -220,7 +221,7 @@ class RoomPermissionsTestCase(RoomBase):
             "/rooms/%s/state/m.room.topic" % self.created_public_rmid,
             topic_content,
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def _test_get_membership(
         self, room: str, members: Iterable = frozenset(), expect_code: int = 200
@@ -309,14 +310,14 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=self.rmcreator_id,
             membership=Membership.JOIN,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
         )
         self.helper.change_membership(
             room=room,
             src=self.user_id,
             targ=self.rmcreator_id,
             membership=Membership.LEAVE,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
         )
 
     def test_joined_permissions(self) -> None:
@@ -342,7 +343,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=other,
             membership=Membership.JOIN,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
         )
 
         # set left of other, expect 403
@@ -351,7 +352,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=other,
             membership=Membership.LEAVE,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
         )
 
         # set left of self, expect 200
@@ -371,7 +372,7 @@ class RoomPermissionsTestCase(RoomBase):
                 src=self.user_id,
                 targ=usr,
                 membership=Membership.INVITE,
-                expect_code=403,
+                expect_code=HTTPStatus.FORBIDDEN,
             )
 
             self.helper.change_membership(
@@ -379,7 +380,7 @@ class RoomPermissionsTestCase(RoomBase):
                 src=self.user_id,
                 targ=usr,
                 membership=Membership.JOIN,
-                expect_code=403,
+                expect_code=HTTPStatus.FORBIDDEN,
             )
 
         # It is always valid to LEAVE if you've already left (currently.)
@@ -388,7 +389,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=self.rmcreator_id,
             membership=Membership.LEAVE,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
         )
 
     # tests the "from banned" line from the table in https://spec.matrix.org/unstable/client-server-api/#mroommember
@@ -405,7 +406,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=other,
             membership=Membership.BAN,
-            expect_code=403,  # expect failure
+            expect_code=HTTPStatus.FORBIDDEN,  # expect failure
             expect_errcode=Codes.FORBIDDEN,
         )
 
@@ -415,7 +416,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.rmcreator_id,
             targ=other,
             membership=Membership.BAN,
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
         # from ban to invite: Must never happen.
@@ -424,7 +425,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.rmcreator_id,
             targ=other,
             membership=Membership.INVITE,
-            expect_code=403,  # expect failure
+            expect_code=HTTPStatus.FORBIDDEN,  # expect failure
             expect_errcode=Codes.BAD_STATE,
         )
 
@@ -434,7 +435,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=other,
             targ=other,
             membership=Membership.JOIN,
-            expect_code=403,  # expect failure
+            expect_code=HTTPStatus.FORBIDDEN,  # expect failure
             expect_errcode=Codes.BAD_STATE,
         )
 
@@ -444,7 +445,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.rmcreator_id,
             targ=other,
             membership=Membership.BAN,
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
         # from ban to knock: Must never happen.
@@ -453,7 +454,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.rmcreator_id,
             targ=other,
             membership=Membership.KNOCK,
-            expect_code=403,  # expect failure
+            expect_code=HTTPStatus.FORBIDDEN,  # expect failure
             expect_errcode=Codes.BAD_STATE,
         )
 
@@ -463,7 +464,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.user_id,
             targ=other,
             membership=Membership.LEAVE,
-            expect_code=403,  # expect failure
+            expect_code=HTTPStatus.FORBIDDEN,  # expect failure
             expect_errcode=Codes.FORBIDDEN,
         )
 
@@ -473,7 +474,7 @@ class RoomPermissionsTestCase(RoomBase):
             src=self.rmcreator_id,
             targ=other,
             membership=Membership.LEAVE,
-            expect_code=200,
+            expect_code=HTTPStatus.OK,
         )
 
 
@@ -493,7 +494,7 @@ class RoomStateTestCase(RoomBase):
             "/rooms/%s/state" % room_id,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertCountEqual(
             [state_event["type"] for state_event in channel.json_body],
             {
@@ -516,7 +517,7 @@ class RoomStateTestCase(RoomBase):
             "/rooms/%s/state/m.room.member/%s" % (room_id, self.user_id),
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual(channel.json_body, {"membership": "join"})
 
 
@@ -530,16 +531,16 @@ class RoomsMemberListTestCase(RoomBase):
     def test_get_member_list(self) -> None:
         room_id = self.helper.create_room_as(self.user_id)
         channel = self.make_request("GET", "/rooms/%s/members" % room_id)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_no_room(self) -> None:
         channel = self.make_request("GET", "/rooms/roomdoesnotexist/members")
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_no_permission(self) -> None:
         room_id = self.helper.create_room_as("@some_other_guy:red")
         channel = self.make_request("GET", "/rooms/%s/members" % room_id)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_no_permission_with_at_token(self) -> None:
         """
@@ -550,7 +551,7 @@ class RoomsMemberListTestCase(RoomBase):
 
         # first sync to get an at token
         channel = self.make_request("GET", "/sync")
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         sync_token = channel.json_body["next_batch"]
 
         # check that permission is denied for @sid1:red to get the
@@ -559,7 +560,7 @@ class RoomsMemberListTestCase(RoomBase):
             "GET",
             f"/rooms/{room_id}/members?at={sync_token}",
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_no_permission_former_member(self) -> None:
         """
@@ -572,14 +573,14 @@ class RoomsMemberListTestCase(RoomBase):
 
         # check that the user can see the member list to start with
         channel = self.make_request("GET", "/rooms/%s/members" % room_id)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # ban the user
         self.helper.change_membership(room_id, "@alice:red", self.user_id, "ban")
 
         # check the user can no longer see the member list
         channel = self.make_request("GET", "/rooms/%s/members" % room_id)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_no_permission_former_member_with_at_token(self) -> None:
         """
@@ -593,14 +594,14 @@ class RoomsMemberListTestCase(RoomBase):
 
         # sync to get an at token
         channel = self.make_request("GET", "/sync")
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         sync_token = channel.json_body["next_batch"]
 
         # check that the user can see the member list to start with
         channel = self.make_request(
             "GET", "/rooms/%s/members?at=%s" % (room_id, sync_token)
         )
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # ban the user (Note: the user is actually allowed to see this event and
         # state so that they know they're banned!)
@@ -612,14 +613,14 @@ class RoomsMemberListTestCase(RoomBase):
 
         # now, with the original user, sync again to get a new at token
         channel = self.make_request("GET", "/sync")
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         sync_token = channel.json_body["next_batch"]
 
         # check the user can no longer see the updated member list
         channel = self.make_request(
             "GET", "/rooms/%s/members?at=%s" % (room_id, sync_token)
         )
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_mixed_memberships(self) -> None:
         room_creator = "@some_other_guy:red"
@@ -628,17 +629,17 @@ class RoomsMemberListTestCase(RoomBase):
         self.helper.invite(room=room_id, src=room_creator, targ=self.user_id)
         # can't see list if you're just invited.
         channel = self.make_request("GET", room_path)
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
         self.helper.join(room=room_id, user=self.user_id)
         # can see list now joined
         channel = self.make_request("GET", room_path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         self.helper.leave(room=room_id, user=self.user_id)
         # can see old list once left
         channel = self.make_request("GET", room_path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
     def test_get_member_list_cancellation(self) -> None:
         """Test cancellation of a `/rooms/$room_id/members` request."""
@@ -651,7 +652,7 @@ class RoomsMemberListTestCase(RoomBase):
             "/rooms/%s/members" % room_id,
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual(len(channel.json_body["chunk"]), 1)
         self.assertLessEqual(
             {
@@ -671,7 +672,7 @@ class RoomsMemberListTestCase(RoomBase):
 
         # first sync to get an at token
         channel = self.make_request("GET", "/sync")
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         sync_token = channel.json_body["next_batch"]
 
         channel = make_request_with_cancellation_test(
@@ -682,7 +683,7 @@ class RoomsMemberListTestCase(RoomBase):
             "/rooms/%s/members?at=%s" % (room_id, sync_token),
         )
 
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual(len(channel.json_body["chunk"]), 1)
         self.assertLessEqual(
             {
@@ -706,7 +707,7 @@ class RoomsCreateTestCase(RoomBase):
         # POST with no config keys, expect new room id
         channel = self.make_request("POST", "/createRoom", "{}")
 
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
         self.assertEqual(36, channel.resource_usage.db_txn_count)
@@ -719,7 +720,7 @@ class RoomsCreateTestCase(RoomBase):
             b'{"initial_state":[{"type": "m.bridge", "content": {}}]}',
         )
 
-        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
         self.assertEqual(40, channel.resource_usage.db_txn_count)
@@ -727,13 +728,13 @@ class RoomsCreateTestCase(RoomBase):
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
         channel = self.make_request("POST", "/createRoom", b'{"visibility":"private"}')
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         self.assertTrue("room_id" in channel.json_body)
 
     def test_post_room_custom_key(self) -> None:
         # POST with custom config keys, expect new room id
         channel = self.make_request("POST", "/createRoom", b'{"custom":"stuff"}')
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         self.assertTrue("room_id" in channel.json_body)
 
     def test_post_room_known_and_unknown_keys(self) -> None:
@@ -741,16 +742,16 @@ class RoomsCreateTestCase(RoomBase):
         channel = self.make_request(
             "POST", "/createRoom", b'{"visibility":"private","custom":"things"}'
         )
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         self.assertTrue("room_id" in channel.json_body)
 
     def test_post_room_invalid_content(self) -> None:
         # POST with invalid content / paths, expect 400
         channel = self.make_request("POST", "/createRoom", b'{"visibili')
-        self.assertEqual(400, channel.code)
+        self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code)
 
         channel = self.make_request("POST", "/createRoom", b'["hello"]')
-        self.assertEqual(400, channel.code)
+        self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code)
 
     def test_post_room_invitees_invalid_mxid(self) -> None:
         # POST with invalid invitee, see https://github.com/matrix-org/synapse/issues/4088
@@ -758,7 +759,7 @@ class RoomsCreateTestCase(RoomBase):
         channel = self.make_request(
             "POST", "/createRoom", b'{"invite":["@alice:example.com "]}'
         )
-        self.assertEqual(400, channel.code)
+        self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code)
 
     @unittest.override_config({"rc_invites": {"per_room": {"burst_count": 3}}})
     def test_post_room_invitees_ratelimit(self) -> None:
@@ -782,7 +783,7 @@ class RoomsCreateTestCase(RoomBase):
 
         # Test that the invites are correctly ratelimited.
         channel = self.make_request("POST", "/createRoom", content)
-        self.assertEqual(400, channel.code)
+        self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code)
         self.assertEqual(
             "Cannot invite so many users at once",
             channel.json_body["error"],
@@ -795,7 +796,7 @@ class RoomsCreateTestCase(RoomBase):
 
         # Test that the invites aren't ratelimited anymore.
         channel = self.make_request("POST", "/createRoom", content)
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
 
     def test_spam_checker_may_join_room_deprecated(self) -> None:
         """Tests that the user_may_join_room spam checker callback is correctly bypassed
@@ -819,7 +820,7 @@ class RoomsCreateTestCase(RoomBase):
             "/createRoom",
             {},
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         self.assertEqual(join_mock.call_count, 0)
 
@@ -845,7 +846,7 @@ class RoomsCreateTestCase(RoomBase):
             "/createRoom",
             {},
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         self.assertEqual(join_mock.call_count, 0)
 
@@ -865,7 +866,7 @@ class RoomsCreateTestCase(RoomBase):
             "/createRoom",
             {},
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
         self.assertEqual(join_mock.call_count, 0)
 
 
@@ -882,54 +883,68 @@ class RoomTopicTestCase(RoomBase):
     def test_invalid_puts(self) -> None:
         # missing keys or invalid json
         channel = self.make_request("PUT", self.path, "{}")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", self.path, '{"_name":"bo"}')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", self.path, '{"nao')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request(
             "PUT", self.path, '[{"_name":"bo"},{"_name":"jill"}]'
         )
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", self.path, "text only")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", self.path, "")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         # valid key, wrong type
         content = '{"topic":["Topic name"]}'
         channel = self.make_request("PUT", self.path, content)
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
     def test_rooms_topic(self) -> None:
         # nothing should be there
         channel = self.make_request("GET", self.path)
-        self.assertEqual(404, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.result["body"])
 
         # valid put
         content = '{"topic":"Topic name"}'
         channel = self.make_request("PUT", self.path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # valid get
         channel = self.make_request("GET", self.path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assert_dict(json.loads(content), channel.json_body)
 
     def test_rooms_topic_with_extra_keys(self) -> None:
         # valid put with extra keys
         content = '{"topic":"Seasons","subtopic":"Summer"}'
         channel = self.make_request("PUT", self.path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # valid get
         channel = self.make_request("GET", self.path)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assert_dict(json.loads(content), channel.json_body)
 
 
@@ -945,22 +960,34 @@ class RoomMemberStateTestCase(RoomBase):
         path = "/rooms/%s/state/m.room.member/%s" % (self.room_id, self.user_id)
         # missing keys or invalid json
         channel = self.make_request("PUT", path, "{}")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, '{"_name":"bo"}')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, '{"nao')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b'[{"_name":"bo"},{"_name":"jill"}]')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, "text only")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, "")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         # valid keys, wrong types
         content = '{"membership":["%s","%s","%s"]}' % (
@@ -969,7 +996,9 @@ class RoomMemberStateTestCase(RoomBase):
             Membership.LEAVE,
         )
         channel = self.make_request("PUT", path, content.encode("ascii"))
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
     def test_rooms_members_self(self) -> None:
         path = "/rooms/%s/state/m.room.member/%s" % (
@@ -980,10 +1009,10 @@ class RoomMemberStateTestCase(RoomBase):
         # valid join message (NOOP since we made the room)
         content = '{"membership":"%s"}' % Membership.JOIN
         channel = self.make_request("PUT", path, content.encode("ascii"))
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         channel = self.make_request("GET", path, content=b"")
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         expected_response = {"membership": Membership.JOIN}
         self.assertEqual(expected_response, channel.json_body)
@@ -998,10 +1027,10 @@ class RoomMemberStateTestCase(RoomBase):
         # valid invite message
         content = '{"membership":"%s"}' % Membership.INVITE
         channel = self.make_request("PUT", path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         channel = self.make_request("GET", path, content=b"")
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual(json.loads(content), channel.json_body)
 
     def test_rooms_members_other_custom_keys(self) -> None:
@@ -1017,10 +1046,10 @@ class RoomMemberStateTestCase(RoomBase):
             "Join us!",
         )
         channel = self.make_request("PUT", path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         channel = self.make_request("GET", path, content=b"")
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
         self.assertEqual(json.loads(content), channel.json_body)
 
 
@@ -1137,7 +1166,9 @@ class RoomJoinTestCase(RoomBase):
 
         # Now make the callback deny all room joins, and check that a join actually fails.
         return_value = False
-        self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2)
+        self.helper.join(
+            self.room3, self.user2, expect_code=HTTPStatus.FORBIDDEN, tok=self.tok2
+        )
 
     def test_spam_checker_may_join_room(self) -> None:
         """Tests that the user_may_join_room spam checker callback is correctly called
@@ -1205,7 +1236,7 @@ class RoomJoinTestCase(RoomBase):
         self.helper.join(
             self.room3,
             self.user2,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
             expect_errcode=return_value,
             tok=self.tok2,
         )
@@ -1216,7 +1247,7 @@ class RoomJoinTestCase(RoomBase):
         self.helper.join(
             self.room3,
             self.user2,
-            expect_code=403,
+            expect_code=HTTPStatus.FORBIDDEN,
             expect_errcode=return_value[0],
             tok=self.tok2,
             expect_additional_fields=return_value[1],
@@ -1270,7 +1301,7 @@ class RoomJoinRatelimitTestCase(RoomBase):
         # Update the display name for the user.
         path = "/_matrix/client/r0/profile/%s/displayname" % self.user_id
         channel = self.make_request("PUT", path, {"displayname": "John Doe"})
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         # Check that all the rooms have been sent a profile update into.
         for room_id in room_ids:
@@ -1335,71 +1366,93 @@ class RoomMessagesTestCase(RoomBase):
         path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id))
         # missing keys or invalid json
         channel = self.make_request("PUT", path, b"{}")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b'{"_name":"bo"}')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b'{"nao')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b'[{"_name":"bo"},{"_name":"jill"}]')
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b"text only")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         channel = self.make_request("PUT", path, b"")
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
     def test_rooms_messages_sent(self) -> None:
         path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id))
 
         content = b'{"body":"test","msgtype":{"type":"a"}}'
         channel = self.make_request("PUT", path, content)
-        self.assertEqual(400, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
+        )
 
         # custom message types
         content = b'{"body":"test","msgtype":"test.custom.text"}'
         channel = self.make_request("PUT", path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
         # m.text message type
         path = "/rooms/%s/send/m.room.message/mid2" % (urlparse.quote(self.room_id))
         content = b'{"body":"test2","msgtype":"m.text"}'
         channel = self.make_request("PUT", path, content)
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
     @parameterized.expand(
         [
             # Allow
             param(
-                name="NOT_SPAM", value="NOT_SPAM", expected_code=200, expected_fields={}
+                name="NOT_SPAM",
+                value="NOT_SPAM",
+                expected_code=HTTPStatus.OK,
+                expected_fields={},
+            ),
+            param(
+                name="False",
+                value=False,
+                expected_code=HTTPStatus.OK,
+                expected_fields={},
             ),
-            param(name="False", value=False, expected_code=200, expected_fields={}),
             # Block
             param(
                 name="scalene string",
                 value="ANY OTHER STRING",
-                expected_code=403,
+                expected_code=HTTPStatus.FORBIDDEN,
                 expected_fields={"errcode": "M_FORBIDDEN"},
             ),
             param(
                 name="True",
                 value=True,
-                expected_code=403,
+                expected_code=HTTPStatus.FORBIDDEN,
                 expected_fields={"errcode": "M_FORBIDDEN"},
             ),
             param(
                 name="Code",
                 value=Codes.LIMIT_EXCEEDED,
-                expected_code=403,
+                expected_code=HTTPStatus.FORBIDDEN,
                 expected_fields={"errcode": "M_LIMIT_EXCEEDED"},
             ),
             param(
                 name="Tuple",
                 value=(Codes.SERVER_NOT_TRUSTED, {"additional_field": "12345"}),
-                expected_code=403,
+                expected_code=HTTPStatus.FORBIDDEN,
                 expected_fields={
                     "errcode": "M_SERVER_NOT_TRUSTED",
                     "additional_field": "12345",
@@ -1584,7 +1637,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
         channel = self.make_request("PUT", path, "{}")
 
         # Then I am allowed
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
     def test_normal_user_can_not_post_state_event(self) -> None:
         # Given I am a normal member of a room
@@ -1598,7 +1651,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
         channel = self.make_request("PUT", path, "{}")
 
         # Then I am not allowed because state events require PL>=50
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         self.assertEqual(
             "You don't have permission to post that to the room. "
             "user_level (0) < send_level (50)",
@@ -1625,7 +1678,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
         channel = self.make_request("PUT", path, "{}")
 
         # Then I am allowed
-        self.assertEqual(200, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
 
     @unittest.override_config(
         {
@@ -1653,7 +1706,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
         channel = self.make_request("PUT", path, "{}")
 
         # Then I am not allowed
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
 
     @unittest.override_config(
         {
@@ -1681,7 +1734,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
         channel = self.make_request("PUT", path, "{}")
 
         # Then I am not allowed
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         self.assertEqual(
             "You don't have permission to post that to the room. "
             + "user_level (0) < send_level (1)",
@@ -1712,7 +1765,7 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
 
         # Then I am not allowed because the public_chat config does not
         # affect this room, because this room is a private_chat
-        self.assertEqual(403, channel.code, msg=channel.result["body"])
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
         self.assertEqual(
             "You don't have permission to post that to the room. "
             + "user_level (0) < send_level (50)",
@@ -1731,7 +1784,7 @@ class RoomInitialSyncTestCase(RoomBase):
 
     def test_initial_sync(self) -> None:
         channel = self.make_request("GET", "/rooms/%s/initialSync" % self.room_id)
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
 
         self.assertEqual(self.room_id, channel.json_body["room_id"])
         self.assertEqual("join", channel.json_body["membership"])
@@ -1774,7 +1827,7 @@ class RoomMessageListTestCase(RoomBase):
         channel = self.make_request(
             "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token)
         )
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         self.assertTrue("start" in channel.json_body)
         self.assertEqual(token, channel.json_body["start"])
         self.assertTrue("chunk" in channel.json_body)
@@ -1785,7 +1838,7 @@ class RoomMessageListTestCase(RoomBase):
         channel = self.make_request(
             "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token)
         )
-        self.assertEqual(200, channel.code)
+        self.assertEqual(HTTPStatus.OK, channel.code)
         self.assertTrue("start" in channel.json_body)
         self.assertEqual(token, channel.json_body["start"])
         self.assertTrue("chunk" in channel.json_body)
@@ -1824,7 +1877,7 @@ class RoomMessageListTestCase(RoomBase):
                 json.dumps({"types": [EventTypes.Message]}),
             ),
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         chunk = channel.json_body["chunk"]
         self.assertEqual(len(chunk), 2, [event["content"] for event in chunk])
@@ -1852,7 +1905,7 @@ class RoomMessageListTestCase(RoomBase):
                 json.dumps({"types": [EventTypes.Message]}),
             ),
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         chunk = channel.json_body["chunk"]
         self.assertEqual(len(chunk), 1, [event["content"] for event in chunk])
@@ -1869,7 +1922,7 @@ class RoomMessageListTestCase(RoomBase):
                 json.dumps({"types": [EventTypes.Message]}),
             ),
         )
-        self.assertEqual(channel.code, 200, channel.json_body)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
 
         chunk = channel.json_body["chunk"]
         self.assertEqual(len(chunk), 0, [event["content"] for event in chunk])
@@ -1997,14 +2050,14 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase):
 
     def test_restricted_no_auth(self) -> None:
         channel = self.make_request("GET", self.url)
-        self.assertEqual(channel.code, 401, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
 
     def test_restricted_auth(self) -> None:
         self.register_user("user", "pass")
         tok = self.login("user", "pass")
 
         channel = self.make_request("GET", self.url, access_token=tok)
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
 
 class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
@@ -2123,7 +2176,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
             content={"filter": search_filter},
             access_token=self.token,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self.federation_client.get_public_rooms.assert_called_once_with(  # type: ignore[attr-defined]
             "testserv",
@@ -2140,7 +2193,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
         # The `get_public_rooms` should be called again if the first call fails
         # with a 404, when using search filters.
         self.federation_client.get_public_rooms.side_effect = (  # type: ignore[attr-defined]
-            HttpResponseException(404, "Not Found", b""),
+            HttpResponseException(HTTPStatus.NOT_FOUND, "Not Found", b""),
             make_awaitable({}),
         )
 
@@ -2152,7 +2205,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
             content={"filter": search_filter},
             access_token=self.token,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self.federation_client.get_public_rooms.assert_has_calls(  # type: ignore[attr-defined]
             [
@@ -2206,7 +2259,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
             request_data,
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
 
@@ -2220,7 +2273,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
             request_data,
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         event_id = channel.json_body["event_id"]
 
         channel = self.make_request(
@@ -2228,7 +2281,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
             "/_matrix/client/r0/rooms/%s/event/%s" % (self.room_id, event_id),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         res_displayname = channel.json_body["content"]["displayname"]
         self.assertEqual(res_displayname, self.displayname, channel.result)
@@ -2262,7 +2315,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason},
             access_token=self.second_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2276,7 +2329,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason},
             access_token=self.second_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2290,7 +2343,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.second_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2304,7 +2357,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2316,7 +2369,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2328,7 +2381,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2347,7 +2400,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             content={"reason": reason},
             access_token=self.second_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         self._check_for_reason(reason)
 
@@ -2359,7 +2412,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
             ),
             access_token=self.creator_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         event_content = channel.json_body
 
@@ -2407,7 +2460,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
             % (self.room_id, event_id, json.dumps(self.FILTER_LABELS)),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         events_before = channel.json_body["events_before"]
 
@@ -2437,7 +2490,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
             % (self.room_id, event_id, json.dumps(self.FILTER_NOT_LABELS)),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         events_before = channel.json_body["events_before"]
 
@@ -2472,7 +2525,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
             % (self.room_id, event_id, json.dumps(self.FILTER_LABELS_NOT_LABELS)),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         events_before = channel.json_body["events_before"]
 
@@ -2820,7 +2873,7 @@ class RelationsTestCase(unittest.HomeserverTestCase):
             "/rooms/%s/messages?filter=%s&dir=b" % (self.room_id, json.dumps(filter)),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         return channel.json_body["chunk"]
 
@@ -2925,7 +2978,7 @@ class ContextTestCase(unittest.HomeserverTestCase):
             % (self.room_id, event_id),
             access_token=self.tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         events_before = channel.json_body["events_before"]
 
@@ -2991,7 +3044,7 @@ class ContextTestCase(unittest.HomeserverTestCase):
             % (self.room_id, event_id),
             access_token=invited_tok,
         )
-        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
         events_before = channel.json_body["events_before"]
 

From 5d4028f217f178fcd384d5bfddd92225b4e78c51 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Sun, 17 Jul 2022 23:19:43 +0200
Subject: [PATCH 138/178] Make all `process_replication_rows` methods async
 (#13304)

More prep work for asyncronous caching, also makes all process_replication_rows methods consistent (presence handler already is so).

Signed off by Nick @ Beeper (@Fizzadar)
---
 changelog.d/13304.misc                          | 1 +
 synapse/handlers/typing.py                      | 4 ++--
 synapse/replication/slave/storage/devices.py    | 6 ++++--
 synapse/replication/slave/storage/push_rule.py  | 6 ++++--
 synapse/replication/slave/storage/pushers.py    | 6 ++++--
 synapse/replication/tcp/client.py               | 6 ++++--
 synapse/storage/_base.py                        | 2 +-
 synapse/storage/databases/main/account_data.py  | 4 ++--
 synapse/storage/databases/main/cache.py         | 4 ++--
 synapse/storage/databases/main/deviceinbox.py   | 6 ++++--
 synapse/storage/databases/main/events_worker.py | 4 ++--
 synapse/storage/databases/main/presence.py      | 6 ++++--
 synapse/storage/databases/main/receipts.py      | 6 ++++--
 synapse/storage/databases/main/tags.py          | 4 ++--
 14 files changed, 40 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/13304.misc

diff --git a/changelog.d/13304.misc b/changelog.d/13304.misc
new file mode 100644
index 0000000000..156d3d71d7
--- /dev/null
+++ b/changelog.d/13304.misc
@@ -0,0 +1 @@
+Make all replication row processing methods asynchronous. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index d104ea07fe..26edeeca3c 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -158,7 +158,7 @@ class FollowerTypingHandler:
         except Exception:
             logger.exception("Error pushing typing notif to remotes")
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, token: int, rows: List[TypingStream.TypingStreamRow]
     ) -> None:
         """Should be called whenever we receive updates for typing stream."""
@@ -444,7 +444,7 @@ class TypingWriterHandler(FollowerTypingHandler):
 
         return rows, current_id, limited
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, token: int, rows: List[TypingStream.TypingStreamRow]
     ) -> None:
         # The writing process should never get updates from replication.
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index a48cc02069..22f7999721 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -49,7 +49,7 @@ class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore):
     def get_device_stream_token(self) -> int:
         return self._device_list_id_gen.get_current_token()
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == DeviceListsStream.NAME:
@@ -59,7 +59,9 @@ class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore):
             self._device_list_id_gen.advance(instance_name, token)
             for row in rows:
                 self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
 
     def _invalidate_caches_for_devices(
         self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
index 52ee3f7e58..e1838a81a9 100644
--- a/synapse/replication/slave/storage/push_rule.py
+++ b/synapse/replication/slave/storage/push_rule.py
@@ -24,7 +24,7 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
     def get_max_push_rules_stream_id(self) -> int:
         return self._push_rules_stream_id_gen.get_current_token()
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == PushRulesStream.NAME:
@@ -33,4 +33,6 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
                 self.get_push_rules_for_user.invalidate((row.user_id,))
                 self.get_push_rules_enabled_for_user.invalidate((row.user_id,))
                 self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index de642bba71..fb3f5653af 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -40,9 +40,11 @@ class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore):
     def get_pushers_stream_token(self) -> int:
         return self._pushers_id_gen.get_current_token()
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == PushersStream.NAME:
             self._pushers_id_gen.advance(instance_name, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 2f59245058..f9722ccb4f 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -144,13 +144,15 @@ class ReplicationDataHandler:
             token: stream token for this batch of rows
             rows: a list of Stream.ROW_TYPE objects as returned by Stream.parse_row.
         """
-        self.store.process_replication_rows(stream_name, instance_name, token, rows)
+        await self.store.process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
 
         if self.send_handler:
             await self.send_handler.process_replication_rows(stream_name, token, rows)
 
         if stream_name == TypingStream.NAME:
-            self._typing_handler.process_replication_rows(token, rows)
+            await self._typing_handler.process_replication_rows(token, rows)
             self.notifier.on_new_event(
                 StreamKeyType.TYPING, token, rooms=[row.room_id for row in rows]
             )
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index b8c8dcd76b..822108e83b 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -47,7 +47,7 @@ class SQLBaseStore(metaclass=ABCMeta):
         self.database_engine = database.engine
         self.db_pool = database
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 9af9f4f18e..337b22294e 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -414,7 +414,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             )
         )
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -437,7 +437,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 )
                 self._account_data_stream_cache.entity_has_changed(row.user_id, token)
 
-        super().process_replication_rows(stream_name, instance_name, token, rows)
+        await super().process_replication_rows(stream_name, instance_name, token, rows)
 
     async def add_account_data_to_room(
         self, user_id: str, room_id: str, account_data_type: str, content: JsonDict
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 2367ddeea3..048ff3e1b7 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -119,7 +119,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             "get_all_updated_caches", get_all_updated_caches_txn
         )
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == EventsStream.NAME:
@@ -154,7 +154,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 else:
                     self._attempt_to_invalidate_cache(row.cache_func, row.keys)
 
-        super().process_replication_rows(stream_name, instance_name, token, rows)
+        await super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None:
         data = row.data
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 422e0e65ca..45fe58c104 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -128,7 +128,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             prefilled_cache=device_outbox_prefill,
         )
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -148,7 +148,9 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     self._device_federation_outbox_stream_cache.entity_has_changed(
                         row.entity, token
                     )
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
 
     def get_to_device_stream_token(self) -> int:
         return self._device_inbox_id_gen.get_current_token()
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index f3935bfead..5310d4eda2 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -280,7 +280,7 @@ class EventsWorkerStore(SQLBaseStore):
             id_column="chain_id",
         )
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -292,7 +292,7 @@ class EventsWorkerStore(SQLBaseStore):
         elif stream_name == BackfillStream.NAME:
             self._backfill_id_gen.advance(instance_name, -token)
 
-        super().process_replication_rows(stream_name, instance_name, token, rows)
+        await super().process_replication_rows(stream_name, instance_name, token, rows)
 
     async def have_censored_event(self, event_id: str) -> bool:
         """Check if an event has been censored, i.e. if the content of the event has been erased
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index 9769a18a9d..9fe3124b35 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -431,7 +431,7 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
         self._presence_on_startup = []
         return active_on_startup
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -443,4 +443,6 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
             for row in rows:
                 self.presence_stream_cache.entity_has_changed(row.user_id, token)
                 self._get_presence_for_user.invalidate((row.user_id,))
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 0090c9f225..f85862d968 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -589,7 +589,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             "get_unread_event_push_actions_by_room_for_user", (room_id,)
         )
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -604,7 +604,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 )
                 self._receipts_stream_cache.entity_has_changed(row.room_id, token)
 
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        return await super().process_replication_rows(
+            stream_name, instance_name, token, rows
+        )
 
     def _insert_linearized_receipt_txn(
         self,
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index b0f5de67a3..5e8905369c 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -292,7 +292,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
                 # than the id that the client has.
                 pass
 
-    def process_replication_rows(
+    async def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -305,7 +305,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
                 self.get_tags_for_user.invalidate((row.user_id,))
                 self._account_data_stream_cache.entity_has_changed(row.user_id, token)
 
-        super().process_replication_rows(stream_name, instance_name, token, rows)
+        await super().process_replication_rows(stream_name, instance_name, token, rows)
 
 
 class TagsStore(TagsWorkerStore):

From efee345b454ac5e6aeb4b4128793be1fbc308b91 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Sun, 17 Jul 2022 23:28:45 +0200
Subject: [PATCH 139/178] Remove unnecessary `json.dumps` from tests (#13303)

---
 changelog.d/13303.misc                    |   1 +
 tests/rest/client/test_account.py         |  23 ++---
 tests/rest/client/test_directory.py       |  16 ++--
 tests/rest/client/test_identity.py        |   4 +-
 tests/rest/client/test_login.py           |   3 +-
 tests/rest/client/test_password_policy.py |  31 +++----
 tests/rest/client/test_register.py        | 103 ++++++++++------------
 tests/rest/client/test_report_event.py    |   7 +-
 tests/rest/client/test_rooms.py           |  74 +++++++---------
 tests/rest/client/test_sync.py            |   3 +-
 tests/rest/client/utils.py                |   8 +-
 tests/test_terms_auth.py                  |  39 ++++----
 tests/unittest.py                         |  31 +++----
 13 files changed, 143 insertions(+), 200 deletions(-)
 create mode 100644 changelog.d/13303.misc

diff --git a/changelog.d/13303.misc b/changelog.d/13303.misc
new file mode 100644
index 0000000000..03f64ab171
--- /dev/null
+++ b/changelog.d/13303.misc
@@ -0,0 +1 @@
+Remove unnecessary `json.dumps` from tests.
\ No newline at end of file
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index 6d6a26b8f4..7ae926dc9c 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
 import re
 from email.parser import Parser
@@ -96,9 +95,7 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
         """
         body = {"type": "m.login.password", "user": username, "password": password}
 
-        channel = self.make_request(
-            "POST", "/_matrix/client/r0/login", json.dumps(body).encode("utf8")
-        )
+        channel = self.make_request("POST", "/_matrix/client/r0/login", body)
         self.assertEqual(channel.code, HTTPStatus.FORBIDDEN, channel.result)
 
     def test_basic_password_reset(self) -> None:
@@ -480,16 +477,14 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
         self.assertEqual(memberships[0].room_id, room_id, memberships)
 
     def deactivate(self, user_id: str, tok: str) -> None:
-        request_data = json.dumps(
-            {
-                "auth": {
-                    "type": "m.login.password",
-                    "user": user_id,
-                    "password": "test",
-                },
-                "erase": False,
-            }
-        )
+        request_data = {
+            "auth": {
+                "type": "m.login.password",
+                "user": user_id,
+                "password": "test",
+            },
+            "erase": False,
+        }
         channel = self.make_request(
             "POST", "account/deactivate", request_data, access_token=tok
         )
diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py
index 16e7ef41bc..7a88aa2cda 100644
--- a/tests/rest/client/test_directory.py
+++ b/tests/rest/client/test_directory.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 from http import HTTPStatus
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -97,8 +96,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
 
         # We use deliberately a localpart under the length threshold so
         # that we can make sure that the check is done on the whole alias.
-        data = {"room_alias_name": random_string(256 - len(self.hs.hostname))}
-        request_data = json.dumps(data)
+        request_data = {"room_alias_name": random_string(256 - len(self.hs.hostname))}
         channel = self.make_request(
             "POST", url, request_data, access_token=self.user_tok
         )
@@ -110,8 +108,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
         # Check with an alias of allowed length. There should already be
         # a test that ensures it works in test_register.py, but let's be
         # as cautious as possible here.
-        data = {"room_alias_name": random_string(5)}
-        request_data = json.dumps(data)
+        request_data = {"room_alias_name": random_string(5)}
         channel = self.make_request(
             "POST", url, request_data, access_token=self.user_tok
         )
@@ -144,8 +141,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
 
         # Add an alias for the room, as the appservice
         alias = RoomAlias(f"asns-{random_string(5)}", self.hs.hostname).to_string()
-        data = {"room_id": self.room_id}
-        request_data = json.dumps(data)
+        request_data = {"room_id": self.room_id}
 
         channel = self.make_request(
             "PUT",
@@ -193,8 +189,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
             self.hs.hostname,
         )
 
-        data = {"aliases": [self.random_alias(alias_length)]}
-        request_data = json.dumps(data)
+        request_data = {"aliases": [self.random_alias(alias_length)]}
 
         channel = self.make_request(
             "PUT", url, request_data, access_token=self.user_tok
@@ -206,8 +201,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase):
     ) -> str:
         alias = self.random_alias(alias_length)
         url = "/_matrix/client/r0/directory/room/%s" % alias
-        data = {"room_id": self.room_id}
-        request_data = json.dumps(data)
+        request_data = {"room_id": self.room_id}
 
         channel = self.make_request(
             "PUT", url, request_data, access_token=self.user_tok
diff --git a/tests/rest/client/test_identity.py b/tests/rest/client/test_identity.py
index 299b9d21e2..dc17c9d113 100644
--- a/tests/rest/client/test_identity.py
+++ b/tests/rest/client/test_identity.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 from http import HTTPStatus
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -51,12 +50,11 @@ class IdentityTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
         room_id = channel.json_body["room_id"]
 
-        params = {
+        request_data = {
             "id_server": "testis",
             "medium": "email",
             "address": "test@example.com",
         }
-        request_data = json.dumps(params)
         request_url = ("/rooms/%s/invite" % (room_id)).encode("ascii")
         channel = self.make_request(
             b"POST", request_url, request_data, access_token=tok
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index e7f5517e34..a2958f6959 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import time
 import urllib.parse
 from http import HTTPStatus
@@ -400,7 +399,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         channel = self.make_request(
             "POST",
             "/_matrix/client/v3/login",
-            json.dumps(body).encode("utf8"),
+            body,
             custom_headers=None,
         )
 
diff --git a/tests/rest/client/test_password_policy.py b/tests/rest/client/test_password_policy.py
index 3a74d2e96c..e19d21d6ee 100644
--- a/tests/rest/client/test_password_policy.py
+++ b/tests/rest/client/test_password_policy.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 from http import HTTPStatus
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -89,7 +88,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_too_short(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "shorty"})
+        request_data = {"username": "kermit", "password": "shorty"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
@@ -100,7 +99,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_no_digit(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "longerpassword"})
+        request_data = {"username": "kermit", "password": "longerpassword"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
@@ -111,7 +110,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_no_symbol(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword"})
+        request_data = {"username": "kermit", "password": "l0ngerpassword"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
@@ -122,7 +121,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_no_uppercase(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword!"})
+        request_data = {"username": "kermit", "password": "l0ngerpassword!"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
@@ -133,7 +132,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_no_lowercase(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "L0NGERPASSWORD!"})
+        request_data = {"username": "kermit", "password": "L0NGERPASSWORD!"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
@@ -144,7 +143,7 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         )
 
     def test_password_compliant(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "L0ngerpassword!"})
+        request_data = {"username": "kermit", "password": "L0ngerpassword!"}
         channel = self.make_request("POST", self.register_url, request_data)
 
         # Getting a 401 here means the password has passed validation and the server has
@@ -161,16 +160,14 @@ class PasswordPolicyTestCase(unittest.HomeserverTestCase):
         user_id = self.register_user("kermit", compliant_password)
         tok = self.login("kermit", compliant_password)
 
-        request_data = json.dumps(
-            {
-                "new_password": not_compliant_password,
-                "auth": {
-                    "password": compliant_password,
-                    "type": LoginType.PASSWORD,
-                    "user": user_id,
-                },
-            }
-        )
+        request_data = {
+            "new_password": not_compliant_password,
+            "auth": {
+                "password": compliant_password,
+                "type": LoginType.PASSWORD,
+                "user": user_id,
+            },
+        }
         channel = self.make_request(
             "POST",
             "/_matrix/client/r0/account/password",
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index cb27458746..071b488cc0 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import datetime
-import json
 import os
 from typing import Any, Dict, List, Tuple
 
@@ -62,9 +61,10 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         )
 
         self.hs.get_datastores().main.services_cache.append(appservice)
-        request_data = json.dumps(
-            {"username": "as_user_kermit", "type": APP_SERVICE_REGISTRATION_TYPE}
-        )
+        request_data = {
+            "username": "as_user_kermit",
+            "type": APP_SERVICE_REGISTRATION_TYPE,
+        }
 
         channel = self.make_request(
             b"POST", self.url + b"?access_token=i_am_an_app_service", request_data
@@ -85,7 +85,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         )
 
         self.hs.get_datastores().main.services_cache.append(appservice)
-        request_data = json.dumps({"username": "as_user_kermit"})
+        request_data = {"username": "as_user_kermit"}
 
         channel = self.make_request(
             b"POST", self.url + b"?access_token=i_am_an_app_service", request_data
@@ -95,9 +95,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
     def test_POST_appservice_registration_invalid(self) -> None:
         self.appservice = None  # no application service exists
-        request_data = json.dumps(
-            {"username": "kermit", "type": APP_SERVICE_REGISTRATION_TYPE}
-        )
+        request_data = {"username": "kermit", "type": APP_SERVICE_REGISTRATION_TYPE}
         channel = self.make_request(
             b"POST", self.url + b"?access_token=i_am_an_app_service", request_data
         )
@@ -105,14 +103,14 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"401", channel.result)
 
     def test_POST_bad_password(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": 666})
+        request_data = {"username": "kermit", "password": 666}
         channel = self.make_request(b"POST", self.url, request_data)
 
         self.assertEqual(channel.result["code"], b"400", channel.result)
         self.assertEqual(channel.json_body["error"], "Invalid password")
 
     def test_POST_bad_username(self) -> None:
-        request_data = json.dumps({"username": 777, "password": "monkey"})
+        request_data = {"username": 777, "password": "monkey"}
         channel = self.make_request(b"POST", self.url, request_data)
 
         self.assertEqual(channel.result["code"], b"400", channel.result)
@@ -121,13 +119,12 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
     def test_POST_user_valid(self) -> None:
         user_id = "@kermit:test"
         device_id = "frogfone"
-        params = {
+        request_data = {
             "username": "kermit",
             "password": "monkey",
             "device_id": device_id,
             "auth": {"type": LoginType.DUMMY},
         }
-        request_data = json.dumps(params)
         channel = self.make_request(b"POST", self.url, request_data)
 
         det_data = {
@@ -140,7 +137,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
     @override_config({"enable_registration": False})
     def test_POST_disabled_registration(self) -> None:
-        request_data = json.dumps({"username": "kermit", "password": "monkey"})
+        request_data = {"username": "kermit", "password": "monkey"}
         self.auth_result = (None, {"username": "kermit", "password": "monkey"}, None)
 
         channel = self.make_request(b"POST", self.url, request_data)
@@ -188,13 +185,12 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
     @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}})
     def test_POST_ratelimiting(self) -> None:
         for i in range(0, 6):
-            params = {
+            request_data = {
                 "username": "kermit" + str(i),
                 "password": "monkey",
                 "device_id": "frogfone",
                 "auth": {"type": LoginType.DUMMY},
             }
-            request_data = json.dumps(params)
             channel = self.make_request(b"POST", self.url, request_data)
 
             if i == 5:
@@ -234,7 +230,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         }
 
         # Request without auth to get flows and session
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         flows = channel.json_body["flows"]
         # Synapse adds a dummy stage to differentiate flows where otherwise one
@@ -251,8 +247,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session,
         }
-        request_data = json.dumps(params)
-        channel = self.make_request(b"POST", self.url, request_data)
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         completed = channel.json_body["completed"]
         self.assertCountEqual([LoginType.REGISTRATION_TOKEN], completed)
@@ -262,8 +257,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "type": LoginType.DUMMY,
             "session": session,
         }
-        request_data = json.dumps(params)
-        channel = self.make_request(b"POST", self.url, request_data)
+        channel = self.make_request(b"POST", self.url, params)
         det_data = {
             "user_id": f"@{username}:{self.hs.hostname}",
             "home_server": self.hs.hostname,
@@ -290,7 +284,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "password": "monkey",
         }
         # Request without auth to get session
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         session = channel.json_body["session"]
 
         # Test with token param missing (invalid)
@@ -298,21 +292,21 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "type": LoginType.REGISTRATION_TOKEN,
             "session": session,
         }
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.MISSING_PARAM)
         self.assertEqual(channel.json_body["completed"], [])
 
         # Test with non-string (invalid)
         params["auth"]["token"] = 1234
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.INVALID_PARAM)
         self.assertEqual(channel.json_body["completed"], [])
 
         # Test with unknown token (invalid)
         params["auth"]["token"] = "1234"
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED)
         self.assertEqual(channel.json_body["completed"], [])
@@ -337,9 +331,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         params1: JsonDict = {"username": "bert", "password": "monkey"}
         params2: JsonDict = {"username": "ernie", "password": "monkey"}
         # Do 2 requests without auth to get two session IDs
-        channel1 = self.make_request(b"POST", self.url, json.dumps(params1))
+        channel1 = self.make_request(b"POST", self.url, params1)
         session1 = channel1.json_body["session"]
-        channel2 = self.make_request(b"POST", self.url, json.dumps(params2))
+        channel2 = self.make_request(b"POST", self.url, params2)
         session2 = channel2.json_body["session"]
 
         # Use token with session1 and check `pending` is 1
@@ -348,9 +342,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session1,
         }
-        self.make_request(b"POST", self.url, json.dumps(params1))
+        self.make_request(b"POST", self.url, params1)
         # Repeat request to make sure pending isn't increased again
-        self.make_request(b"POST", self.url, json.dumps(params1))
+        self.make_request(b"POST", self.url, params1)
         pending = self.get_success(
             store.db_pool.simple_select_one_onecol(
                 "registration_tokens",
@@ -366,14 +360,14 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session2,
         }
-        channel = self.make_request(b"POST", self.url, json.dumps(params2))
+        channel = self.make_request(b"POST", self.url, params2)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED)
         self.assertEqual(channel.json_body["completed"], [])
 
         # Complete registration with session1
         params1["auth"]["type"] = LoginType.DUMMY
-        self.make_request(b"POST", self.url, json.dumps(params1))
+        self.make_request(b"POST", self.url, params1)
         # Check pending=0 and completed=1
         res = self.get_success(
             store.db_pool.simple_select_one(
@@ -386,7 +380,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEqual(res["completed"], 1)
 
         # Check auth still fails when using token with session2
-        channel = self.make_request(b"POST", self.url, json.dumps(params2))
+        channel = self.make_request(b"POST", self.url, params2)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED)
         self.assertEqual(channel.json_body["completed"], [])
@@ -411,7 +405,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         )
         params: JsonDict = {"username": "kermit", "password": "monkey"}
         # Request without auth to get session
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         session = channel.json_body["session"]
 
         # Check authentication fails with expired token
@@ -420,7 +414,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session,
         }
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         self.assertEqual(channel.result["code"], b"401", channel.result)
         self.assertEqual(channel.json_body["errcode"], Codes.UNAUTHORIZED)
         self.assertEqual(channel.json_body["completed"], [])
@@ -435,7 +429,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         )
 
         # Check authentication succeeds
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         completed = channel.json_body["completed"]
         self.assertCountEqual([LoginType.REGISTRATION_TOKEN], completed)
 
@@ -460,9 +454,9 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         # Do 2 requests without auth to get two session IDs
         params1: JsonDict = {"username": "bert", "password": "monkey"}
         params2: JsonDict = {"username": "ernie", "password": "monkey"}
-        channel1 = self.make_request(b"POST", self.url, json.dumps(params1))
+        channel1 = self.make_request(b"POST", self.url, params1)
         session1 = channel1.json_body["session"]
-        channel2 = self.make_request(b"POST", self.url, json.dumps(params2))
+        channel2 = self.make_request(b"POST", self.url, params2)
         session2 = channel2.json_body["session"]
 
         # Use token with both sessions
@@ -471,18 +465,18 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session1,
         }
-        self.make_request(b"POST", self.url, json.dumps(params1))
+        self.make_request(b"POST", self.url, params1)
 
         params2["auth"] = {
             "type": LoginType.REGISTRATION_TOKEN,
             "token": token,
             "session": session2,
         }
-        self.make_request(b"POST", self.url, json.dumps(params2))
+        self.make_request(b"POST", self.url, params2)
 
         # Complete registration with session1
         params1["auth"]["type"] = LoginType.DUMMY
-        self.make_request(b"POST", self.url, json.dumps(params1))
+        self.make_request(b"POST", self.url, params1)
 
         # Check `result` of registration token stage for session1 is `True`
         result1 = self.get_success(
@@ -550,7 +544,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
         # Do request without auth to get a session ID
         params: JsonDict = {"username": "kermit", "password": "monkey"}
-        channel = self.make_request(b"POST", self.url, json.dumps(params))
+        channel = self.make_request(b"POST", self.url, params)
         session = channel.json_body["session"]
 
         # Use token
@@ -559,7 +553,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "token": token,
             "session": session,
         }
-        self.make_request(b"POST", self.url, json.dumps(params))
+        self.make_request(b"POST", self.url, params)
 
         # Delete token
         self.get_success(
@@ -827,8 +821,7 @@ class AccountValidityTestCase(unittest.HomeserverTestCase):
         admin_tok = self.login("admin", "adminpassword")
 
         url = "/_synapse/admin/v1/account_validity/validity"
-        params = {"user_id": user_id}
-        request_data = json.dumps(params)
+        request_data = {"user_id": user_id}
         channel = self.make_request(b"POST", url, request_data, access_token=admin_tok)
         self.assertEqual(channel.result["code"], b"200", channel.result)
 
@@ -845,12 +838,11 @@ class AccountValidityTestCase(unittest.HomeserverTestCase):
         admin_tok = self.login("admin", "adminpassword")
 
         url = "/_synapse/admin/v1/account_validity/validity"
-        params = {
+        request_data = {
             "user_id": user_id,
             "expiration_ts": 0,
             "enable_renewal_emails": False,
         }
-        request_data = json.dumps(params)
         channel = self.make_request(b"POST", url, request_data, access_token=admin_tok)
         self.assertEqual(channel.result["code"], b"200", channel.result)
 
@@ -870,12 +862,11 @@ class AccountValidityTestCase(unittest.HomeserverTestCase):
         admin_tok = self.login("admin", "adminpassword")
 
         url = "/_synapse/admin/v1/account_validity/validity"
-        params = {
+        request_data = {
             "user_id": user_id,
             "expiration_ts": 0,
             "enable_renewal_emails": False,
         }
-        request_data = json.dumps(params)
         channel = self.make_request(b"POST", url, request_data, access_token=admin_tok)
         self.assertEqual(channel.result["code"], b"200", channel.result)
 
@@ -1041,16 +1032,14 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase):
 
         (user_id, tok) = self.create_user()
 
-        request_data = json.dumps(
-            {
-                "auth": {
-                    "type": "m.login.password",
-                    "user": user_id,
-                    "password": "monkey",
-                },
-                "erase": False,
-            }
-        )
+        request_data = {
+            "auth": {
+                "type": "m.login.password",
+                "user": user_id,
+                "password": "monkey",
+            },
+            "erase": False,
+        }
         channel = self.make_request(
             "POST", "account/deactivate", request_data, access_token=tok
         )
diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py
index 20a259fc43..ad0d0209f7 100644
--- a/tests/rest/client/test_report_event.py
+++ b/tests/rest/client/test_report_event.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
-
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
@@ -77,10 +75,7 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
 
     def _assert_status(self, response_status: int, data: JsonDict) -> None:
         channel = self.make_request(
-            "POST",
-            self.report_path,
-            json.dumps(data),
-            access_token=self.other_user_tok,
+            "POST", self.report_path, data, access_token=self.other_user_tok
         )
         self.assertEqual(
             response_status, int(channel.result["code"]), msg=channel.result["body"]
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 4c6b3decd8..c60ca604e9 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -770,16 +770,14 @@ class RoomsCreateTestCase(RoomBase):
 
         # Build the request's content. We use local MXIDs because invites over federation
         # are more difficult to mock.
-        content = json.dumps(
-            {
-                "invite": [
-                    "@alice1:red",
-                    "@alice2:red",
-                    "@alice3:red",
-                    "@alice4:red",
-                ]
-            }
-        ).encode("utf8")
+        content = {
+            "invite": [
+                "@alice1:red",
+                "@alice2:red",
+                "@alice3:red",
+                "@alice4:red",
+            ]
+        }
 
         # Test that the invites are correctly ratelimited.
         channel = self.make_request("POST", "/createRoom", content)
@@ -2251,8 +2249,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
 
         # Set a profile for the test user
         self.displayname = "test user"
-        data = {"displayname": self.displayname}
-        request_data = json.dumps(data)
+        request_data = {"displayname": self.displayname}
         channel = self.make_request(
             "PUT",
             "/_matrix/client/r0/profile/%s/displayname" % (self.user_id,),
@@ -2264,8 +2261,7 @@ class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
         self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
 
     def test_per_room_profile_forbidden(self) -> None:
-        data = {"membership": "join", "displayname": "other test user"}
-        request_data = json.dumps(data)
+        request_data = {"membership": "join", "displayname": "other test user"}
         channel = self.make_request(
             "PUT",
             "/_matrix/client/r0/rooms/%s/state/m.room.member/%s"
@@ -2605,16 +2601,14 @@ class LabelsTestCase(unittest.HomeserverTestCase):
 
     def test_search_filter_labels(self) -> None:
         """Test that we can filter by a label on a /search request."""
-        request_data = json.dumps(
-            {
-                "search_categories": {
-                    "room_events": {
-                        "search_term": "label",
-                        "filter": self.FILTER_LABELS,
-                    }
+        request_data = {
+            "search_categories": {
+                "room_events": {
+                    "search_term": "label",
+                    "filter": self.FILTER_LABELS,
                 }
             }
-        )
+        }
 
         self._send_labelled_messages_in_room()
 
@@ -2642,16 +2636,14 @@ class LabelsTestCase(unittest.HomeserverTestCase):
 
     def test_search_filter_not_labels(self) -> None:
         """Test that we can filter by the absence of a label on a /search request."""
-        request_data = json.dumps(
-            {
-                "search_categories": {
-                    "room_events": {
-                        "search_term": "label",
-                        "filter": self.FILTER_NOT_LABELS,
-                    }
+        request_data = {
+            "search_categories": {
+                "room_events": {
+                    "search_term": "label",
+                    "filter": self.FILTER_NOT_LABELS,
                 }
             }
-        )
+        }
 
         self._send_labelled_messages_in_room()
 
@@ -2691,16 +2683,14 @@ class LabelsTestCase(unittest.HomeserverTestCase):
         """Test that we can filter by both a label and the absence of another label on a
         /search request.
         """
-        request_data = json.dumps(
-            {
-                "search_categories": {
-                    "room_events": {
-                        "search_term": "label",
-                        "filter": self.FILTER_LABELS_NOT_LABELS,
-                    }
+        request_data = {
+            "search_categories": {
+                "room_events": {
+                    "search_term": "label",
+                    "filter": self.FILTER_LABELS_NOT_LABELS,
                 }
             }
-        )
+        }
 
         self._send_labelled_messages_in_room()
 
@@ -3145,8 +3135,7 @@ class RoomAliasListTestCase(unittest.HomeserverTestCase):
 
     def _set_alias_via_directory(self, alias: str, expected_code: int = 200) -> None:
         url = "/_matrix/client/r0/directory/room/" + alias
-        data = {"room_id": self.room_id}
-        request_data = json.dumps(data)
+        request_data = {"room_id": self.room_id}
 
         channel = self.make_request(
             "PUT", url, request_data, access_token=self.room_owner_tok
@@ -3175,8 +3164,7 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase):
 
     def _set_alias_via_directory(self, alias: str, expected_code: int = 200) -> None:
         url = "/_matrix/client/r0/directory/room/" + alias
-        data = {"room_id": self.room_id}
-        request_data = json.dumps(data)
+        request_data = {"room_id": self.room_id}
 
         channel = self.make_request(
             "PUT", url, request_data, access_token=self.room_owner_tok
@@ -3202,7 +3190,7 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase):
         channel = self.make_request(
             "PUT",
             "rooms/%s/state/m.room.canonical_alias" % (self.room_id,),
-            json.dumps(content),
+            content,
             access_token=self.room_owner_tok,
         )
         self.assertEqual(channel.code, expected_code, channel.result)
diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py
index e3efd1f1b0..b085c50356 100644
--- a/tests/rest/client/test_sync.py
+++ b/tests/rest/client/test_sync.py
@@ -606,11 +606,10 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase):
         self._check_unread_count(1)
 
         # Send a read receipt to tell the server we've read the latest event.
-        body = json.dumps({ReceiptTypes.READ: res["event_id"]}).encode("utf8")
         channel = self.make_request(
             "POST",
             f"/rooms/{self.room_id}/read_markers",
-            body,
+            {ReceiptTypes.READ: res["event_id"]},
             access_token=self.tok,
         )
         self.assertEqual(channel.code, 200, channel.json_body)
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index 93f749744d..105d418698 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -136,7 +136,7 @@ class RestHelper:
             self.site,
             "POST",
             path,
-            json.dumps(content).encode("utf8"),
+            content,
             custom_headers=custom_headers,
         )
 
@@ -210,7 +210,7 @@ class RestHelper:
             self.site,
             "POST",
             path,
-            json.dumps(data).encode("utf8"),
+            data,
         )
 
         assert (
@@ -309,7 +309,7 @@ class RestHelper:
             self.site,
             "PUT",
             path,
-            json.dumps(data).encode("utf8"),
+            data,
         )
 
         assert (
@@ -392,7 +392,7 @@ class RestHelper:
             self.site,
             "PUT",
             path,
-            json.dumps(content or {}).encode("utf8"),
+            content or {},
             custom_headers=custom_headers,
         )
 
diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py
index 37fada5c53..d3c13cf14c 100644
--- a/tests/test_terms_auth.py
+++ b/tests/test_terms_auth.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactorClock
@@ -51,7 +50,7 @@ class TermsTestCase(unittest.HomeserverTestCase):
 
     def test_ui_auth(self):
         # Do a UI auth request
-        request_data = json.dumps({"username": "kermit", "password": "monkey"})
+        request_data = {"username": "kermit", "password": "monkey"}
         channel = self.make_request(b"POST", self.url, request_data)
 
         self.assertEqual(channel.result["code"], b"401", channel.result)
@@ -82,16 +81,14 @@ class TermsTestCase(unittest.HomeserverTestCase):
         self.assertDictContainsSubset(channel.json_body["params"], expected_params)
 
         # We have to complete the dummy auth stage before completing the terms stage
-        request_data = json.dumps(
-            {
-                "username": "kermit",
-                "password": "monkey",
-                "auth": {
-                    "session": channel.json_body["session"],
-                    "type": "m.login.dummy",
-                },
-            }
-        )
+        request_data = {
+            "username": "kermit",
+            "password": "monkey",
+            "auth": {
+                "session": channel.json_body["session"],
+                "type": "m.login.dummy",
+            },
+        }
 
         self.registration_handler.check_username = Mock(return_value=True)
 
@@ -102,16 +99,14 @@ class TermsTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.result["code"], b"401", channel.result)
 
         # Finish the UI auth for terms
-        request_data = json.dumps(
-            {
-                "username": "kermit",
-                "password": "monkey",
-                "auth": {
-                    "session": channel.json_body["session"],
-                    "type": "m.login.terms",
-                },
-            }
-        )
+        request_data = {
+            "username": "kermit",
+            "password": "monkey",
+            "auth": {
+                "session": channel.json_body["session"],
+                "type": "m.login.terms",
+            },
+        }
         channel = self.make_request(b"POST", self.url, request_data)
 
         # We're interested in getting a response that looks like a successful
diff --git a/tests/unittest.py b/tests/unittest.py
index 7b97a4bf6e..9f1ff774a8 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -16,7 +16,6 @@
 import gc
 import hashlib
 import hmac
-import json
 import logging
 import secrets
 import time
@@ -619,20 +618,16 @@ class HomeserverTestCase(TestCase):
         want_mac.update(nonce.encode("ascii") + b"\x00" + nonce_str)
         want_mac_digest = want_mac.hexdigest()
 
-        body = json.dumps(
-            {
-                "nonce": nonce,
-                "username": username,
-                "displayname": displayname,
-                "password": password,
-                "admin": admin,
-                "mac": want_mac_digest,
-                "inhibit_login": True,
-            }
-        )
-        channel = self.make_request(
-            "POST", "/_synapse/admin/v1/register", body.encode("utf8")
-        )
+        body = {
+            "nonce": nonce,
+            "username": username,
+            "displayname": displayname,
+            "password": password,
+            "admin": admin,
+            "mac": want_mac_digest,
+            "inhibit_login": True,
+        }
+        channel = self.make_request("POST", "/_synapse/admin/v1/register", body)
         self.assertEqual(channel.code, 200, channel.json_body)
 
         user_id = channel.json_body["user_id"]
@@ -676,9 +671,7 @@ class HomeserverTestCase(TestCase):
         custom_headers: Optional[Iterable[CustomHeaderType]] = None,
     ) -> str:
         """
-        Log in a user, and get an access token. Requires the Login API be
-        registered.
-
+        Log in a user, and get an access token. Requires the Login API be registered.
         """
         body = {"type": "m.login.password", "user": username, "password": password}
         if device_id:
@@ -687,7 +680,7 @@ class HomeserverTestCase(TestCase):
         channel = self.make_request(
             "POST",
             "/_matrix/client/r0/login",
-            json.dumps(body).encode("utf8"),
+            body,
             custom_headers=custom_headers,
         )
         self.assertEqual(channel.code, 200, channel.result)

From c6a05063ff26502d126be33dd4ae2854ea88cbab Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 18 Jul 2022 10:05:30 +0100
Subject: [PATCH 140/178] Don't pull out the full state when creating an event
 (#13281)

---
 changelog.d/13281.misc    | 1 +
 synapse/events/builder.py | 8 +++++++-
 synapse/state/__init__.py | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13281.misc

diff --git a/changelog.d/13281.misc b/changelog.d/13281.misc
new file mode 100644
index 0000000000..dea51d1362
--- /dev/null
+++ b/changelog.d/13281.misc
@@ -0,0 +1 @@
+Don't pull out the full state when creating an event.
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 4caf6cbdee..17f624b68f 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -24,9 +24,11 @@ from synapse.api.room_versions import (
     RoomVersion,
 )
 from synapse.crypto.event_signing import add_hashes_and_signatures
+from synapse.event_auth import auth_types_for_event
 from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict
 from synapse.state import StateHandler
 from synapse.storage.databases.main import DataStore
+from synapse.storage.state import StateFilter
 from synapse.types import EventID, JsonDict
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
@@ -121,7 +123,11 @@ class EventBuilder:
         """
         if auth_event_ids is None:
             state_ids = await self._state.compute_state_after_events(
-                self.room_id, prev_event_ids
+                self.room_id,
+                prev_event_ids,
+                state_filter=StateFilter.from_types(
+                    auth_types_for_event(self.room_version, self)
+                ),
             )
             auth_event_ids = self._event_auth_handler.compute_auth_events(
                 self, state_ids
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index fcb7e829d4..e3faa52cd6 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -157,6 +157,7 @@ class StateHandler:
         self,
         room_id: str,
         event_ids: Collection[str],
+        state_filter: Optional[StateFilter] = None,
     ) -> StateMap[str]:
         """Fetch the state after each of the given event IDs. Resolve them and return.
 
@@ -174,7 +175,7 @@ class StateHandler:
         """
         logger.debug("calling resolve_state_groups from compute_state_after_events")
         ret = await self.resolve_state_groups_for_events(room_id, event_ids)
-        return await ret.get_state(self._state_storage_controller, StateFilter.all())
+        return await ret.get_state(self._state_storage_controller, state_filter)
 
     async def get_current_users_in_room(
         self, room_id: str, latest_event_ids: List[str]

From c5f487b7cb7994c1782bf639223940f19b18e4d9 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 18 Jul 2022 13:02:25 +0100
Subject: [PATCH 141/178] Update expected DB query count when creating a room
 (#13307)

---
 changelog.d/13307.misc          | 1 +
 tests/rest/client/test_rooms.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13307.misc

diff --git a/changelog.d/13307.misc b/changelog.d/13307.misc
new file mode 100644
index 0000000000..45b628ce13
--- /dev/null
+++ b/changelog.d/13307.misc
@@ -0,0 +1 @@
+Don't pull out the full state when creating an event.
\ No newline at end of file
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index c60ca604e9..17571b2d33 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -710,7 +710,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(36, channel.resource_usage.db_txn_count)
+        self.assertEqual(43, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -723,7 +723,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(40, channel.resource_usage.db_txn_count)
+        self.assertEqual(49, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id

From 6785b0f39d8f920a7b91a8a6a043ede08eb277e4 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 18 Jul 2022 15:17:24 +0200
Subject: [PATCH 142/178] Use READ COMMITTED isolation level when purging rooms
 (#12942)

To close: #10294.

Signed off by Nick @ Beeper.
---
 changelog.d/12942.misc                        |  1 +
 .../storage/databases/main/purge_events.py    | 33 +++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/12942.misc

diff --git a/changelog.d/12942.misc b/changelog.d/12942.misc
new file mode 100644
index 0000000000..acb2558d57
--- /dev/null
+++ b/changelog.d/12942.misc
@@ -0,0 +1 @@
+Use lower isolation level when purging rooms to avoid serialization errors. Contributed by Nick @ Beeper.
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 549ce07c16..6d42276503 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -19,6 +19,8 @@ from synapse.api.errors import SynapseError
 from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main import CacheInvalidationWorkerStore
 from synapse.storage.databases.main.state import StateGroupWorkerStore
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines._base import IsolationLevel
 from synapse.types import RoomStreamToken
 
 logger = logging.getLogger(__name__)
@@ -317,11 +319,38 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         Returns:
             The list of state groups to delete.
         """
-        return await self.db_pool.runInteraction(
-            "purge_room", self._purge_room_txn, room_id
+
+        # This first runs the purge transaction with READ_COMMITTED isolation level,
+        # meaning any new rows in the tables will not trigger a serialization error.
+        # We then run the same purge a second time without this isolation level to
+        # purge any of those rows which were added during the first.
+
+        state_groups_to_delete = await self.db_pool.runInteraction(
+            "purge_room",
+            self._purge_room_txn,
+            room_id=room_id,
+            isolation_level=IsolationLevel.READ_COMMITTED,
         )
 
+        state_groups_to_delete.extend(
+            await self.db_pool.runInteraction(
+                "purge_room",
+                self._purge_room_txn,
+                room_id=room_id,
+            ),
+        )
+
+        return state_groups_to_delete
+
     def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]:
+        # This collides with event persistence so we cannot write new events and metadata into
+        # a room while deleting it or this transaction will fail.
+        if isinstance(self.database_engine, PostgresEngine):
+            txn.execute(
+                "SELECT room_version FROM rooms WHERE room_id = ? FOR UPDATE",
+                (room_id,),
+            )
+
         # First, fetch all the state groups that should be deleted, before
         # we delete that information.
         txn.execute(

From cf5fa5063d57e9fde96d666e9152bdda520431b8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 18 Jul 2022 14:19:11 +0100
Subject: [PATCH 143/178] Don't pull out full state when sending dummy events
 (#13310)

---
 changelog.d/13310.misc      | 1 +
 synapse/handlers/message.py | 8 +-------
 2 files changed, 2 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13310.misc

diff --git a/changelog.d/13310.misc b/changelog.d/13310.misc
new file mode 100644
index 0000000000..eaf570e058
--- /dev/null
+++ b/changelog.d/13310.misc
@@ -0,0 +1 @@
+Reduce memory usage of sending dummy events.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index b5fede9496..85abe71ea8 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1849,13 +1849,8 @@ class EventCreationHandler:
 
         # For each room we need to find a joined member we can use to send
         # the dummy event with.
-        latest_event_ids = await self.store.get_prev_events_for_room(room_id)
-        members = await self.state.get_current_users_in_room(
-            room_id, latest_event_ids=latest_event_ids
-        )
+        members = await self.store.get_local_users_in_room(room_id)
         for user_id in members:
-            if not self.hs.is_mine_id(user_id):
-                continue
             requester = create_requester(user_id, authenticated_entity=self.server_name)
             try:
                 event, context = await self.create_event(
@@ -1866,7 +1861,6 @@ class EventCreationHandler:
                         "room_id": room_id,
                         "sender": user_id,
                     },
-                    prev_event_ids=latest_event_ids,
                 )
 
                 event.internal_metadata.proactively_send = False

From f721f1baba9cdefc0bff540c3b93710b36eecee9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 18 Jul 2022 14:28:14 +0100
Subject: [PATCH 144/178] Revert "Make all `process_replication_rows` methods
 async (#13304)" (#13312)

This reverts commit 5d4028f217f178fcd384d5bfddd92225b4e78c51.
---
 changelog.d/13304.misc                          | 1 -
 synapse/handlers/typing.py                      | 4 ++--
 synapse/replication/slave/storage/devices.py    | 6 ++----
 synapse/replication/slave/storage/push_rule.py  | 6 ++----
 synapse/replication/slave/storage/pushers.py    | 6 ++----
 synapse/replication/tcp/client.py               | 6 ++----
 synapse/storage/_base.py                        | 2 +-
 synapse/storage/databases/main/account_data.py  | 4 ++--
 synapse/storage/databases/main/cache.py         | 4 ++--
 synapse/storage/databases/main/deviceinbox.py   | 6 ++----
 synapse/storage/databases/main/events_worker.py | 4 ++--
 synapse/storage/databases/main/presence.py      | 6 ++----
 synapse/storage/databases/main/receipts.py      | 6 ++----
 synapse/storage/databases/main/tags.py          | 4 ++--
 14 files changed, 25 insertions(+), 40 deletions(-)
 delete mode 100644 changelog.d/13304.misc

diff --git a/changelog.d/13304.misc b/changelog.d/13304.misc
deleted file mode 100644
index 156d3d71d7..0000000000
--- a/changelog.d/13304.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make all replication row processing methods asynchronous. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 26edeeca3c..d104ea07fe 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -158,7 +158,7 @@ class FollowerTypingHandler:
         except Exception:
             logger.exception("Error pushing typing notif to remotes")
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, token: int, rows: List[TypingStream.TypingStreamRow]
     ) -> None:
         """Should be called whenever we receive updates for typing stream."""
@@ -444,7 +444,7 @@ class TypingWriterHandler(FollowerTypingHandler):
 
         return rows, current_id, limited
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, token: int, rows: List[TypingStream.TypingStreamRow]
     ) -> None:
         # The writing process should never get updates from replication.
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index 22f7999721..a48cc02069 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -49,7 +49,7 @@ class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore):
     def get_device_stream_token(self) -> int:
         return self._device_list_id_gen.get_current_token()
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == DeviceListsStream.NAME:
@@ -59,9 +59,7 @@ class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore):
             self._device_list_id_gen.advance(instance_name, token)
             for row in rows:
                 self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def _invalidate_caches_for_devices(
         self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
index e1838a81a9..52ee3f7e58 100644
--- a/synapse/replication/slave/storage/push_rule.py
+++ b/synapse/replication/slave/storage/push_rule.py
@@ -24,7 +24,7 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
     def get_max_push_rules_stream_id(self) -> int:
         return self._push_rules_stream_id_gen.get_current_token()
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == PushRulesStream.NAME:
@@ -33,6 +33,4 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
                 self.get_push_rules_for_user.invalidate((row.user_id,))
                 self.get_push_rules_enabled_for_user.invalidate((row.user_id,))
                 self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index fb3f5653af..de642bba71 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -40,11 +40,9 @@ class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore):
     def get_pushers_stream_token(self) -> int:
         return self._pushers_id_gen.get_current_token()
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == PushersStream.NAME:
             self._pushers_id_gen.advance(instance_name, token)
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index f9722ccb4f..2f59245058 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -144,15 +144,13 @@ class ReplicationDataHandler:
             token: stream token for this batch of rows
             rows: a list of Stream.ROW_TYPE objects as returned by Stream.parse_row.
         """
-        await self.store.process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        self.store.process_replication_rows(stream_name, instance_name, token, rows)
 
         if self.send_handler:
             await self.send_handler.process_replication_rows(stream_name, token, rows)
 
         if stream_name == TypingStream.NAME:
-            await self._typing_handler.process_replication_rows(token, rows)
+            self._typing_handler.process_replication_rows(token, rows)
             self.notifier.on_new_event(
                 StreamKeyType.TYPING, token, rooms=[row.room_id for row in rows]
             )
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 822108e83b..b8c8dcd76b 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -47,7 +47,7 @@ class SQLBaseStore(metaclass=ABCMeta):
         self.database_engine = database.engine
         self.db_pool = database
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 337b22294e..9af9f4f18e 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -414,7 +414,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             )
         )
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -437,7 +437,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 )
                 self._account_data_stream_cache.entity_has_changed(row.user_id, token)
 
-        await super().process_replication_rows(stream_name, instance_name, token, rows)
+        super().process_replication_rows(stream_name, instance_name, token, rows)
 
     async def add_account_data_to_room(
         self, user_id: str, room_id: str, account_data_type: str, content: JsonDict
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 048ff3e1b7..2367ddeea3 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -119,7 +119,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             "get_all_updated_caches", get_all_updated_caches_txn
         )
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == EventsStream.NAME:
@@ -154,7 +154,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 else:
                     self._attempt_to_invalidate_cache(row.cache_func, row.keys)
 
-        await super().process_replication_rows(stream_name, instance_name, token, rows)
+        super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None:
         data = row.data
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 45fe58c104..422e0e65ca 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -128,7 +128,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             prefilled_cache=device_outbox_prefill,
         )
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -148,9 +148,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     self._device_federation_outbox_stream_cache.entity_has_changed(
                         row.entity, token
                     )
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def get_to_device_stream_token(self) -> int:
         return self._device_inbox_id_gen.get_current_token()
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 5310d4eda2..f3935bfead 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -280,7 +280,7 @@ class EventsWorkerStore(SQLBaseStore):
             id_column="chain_id",
         )
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -292,7 +292,7 @@ class EventsWorkerStore(SQLBaseStore):
         elif stream_name == BackfillStream.NAME:
             self._backfill_id_gen.advance(instance_name, -token)
 
-        await super().process_replication_rows(stream_name, instance_name, token, rows)
+        super().process_replication_rows(stream_name, instance_name, token, rows)
 
     async def have_censored_event(self, event_id: str) -> bool:
         """Check if an event has been censored, i.e. if the content of the event has been erased
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index 9fe3124b35..9769a18a9d 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -431,7 +431,7 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
         self._presence_on_startup = []
         return active_on_startup
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -443,6 +443,4 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
             for row in rows:
                 self.presence_stream_cache.entity_has_changed(row.user_id, token)
                 self._get_presence_for_user.invalidate((row.user_id,))
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index f85862d968..0090c9f225 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -589,7 +589,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             "get_unread_event_push_actions_by_room_for_user", (room_id,)
         )
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -604,9 +604,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 )
                 self._receipts_stream_cache.entity_has_changed(row.room_id, token)
 
-        return await super().process_replication_rows(
-            stream_name, instance_name, token, rows
-        )
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def _insert_linearized_receipt_txn(
         self,
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index 5e8905369c..b0f5de67a3 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -292,7 +292,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
                 # than the id that the client has.
                 pass
 
-    async def process_replication_rows(
+    def process_replication_rows(
         self,
         stream_name: str,
         instance_name: str,
@@ -305,7 +305,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
                 self.get_tags_for_user.invalidate((row.user_id,))
                 self._account_data_stream_cache.entity_has_changed(row.user_id, token)
 
-        await super().process_replication_rows(stream_name, instance_name, token, rows)
+        super().process_replication_rows(stream_name, instance_name, token, rows)
 
 
 class TagsStore(TagsWorkerStore):

From bb25dd81e3d909299092fce7adda05cc6f35dee2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 18 Jul 2022 15:02:32 +0100
Subject: [PATCH 145/178] Prevent #3679 from appearing in blame results
 (#13311)

---
 .git-blame-ignore-revs | 13 +++++++++++++
 changelog.d/13311.misc |  1 +
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/13311.misc

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 50d28c68ee..c3638c35eb 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,3 +1,16 @@
+# Commits in this file will be removed from GitHub blame results.
+#
+# To use this file locally, use:
+#   git blame --ignore-revs-file="path/to/.git-blame-ignore-revs" <files>
+#
+# or configure the `blame.ignoreRevsFile` option in your git config.
+#
+# If ignoring a pull request that was not squash merged, only the merge
+# commit needs to be put here. Child commits will be resolved from it.
+
+# Run black (#3679).
+8b3d9b6b199abb87246f982d5db356f1966db925
+
 # Black reformatting (#5482).
 32e7c9e7f20b57dd081023ac42d6931a8da9b3a3
 
diff --git a/changelog.d/13311.misc b/changelog.d/13311.misc
new file mode 100644
index 0000000000..4be81c675c
--- /dev/null
+++ b/changelog.d/13311.misc
@@ -0,0 +1 @@
+Prevent formatting changes of [#3679](https://github.com/matrix-org/synapse/pull/3679) from appearing in `git blame`.
\ No newline at end of file

From 8c60c572f011a502040b509ae648fd5cad3d4428 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 18 Jul 2022 17:30:59 +0200
Subject: [PATCH 146/178] Up the dependency on canonicaljson to ^1.5.0 (#13172)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13172.misc | 1 +
 poetry.lock            | 2 +-
 pyproject.toml         | 4 +++-
 3 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13172.misc

diff --git a/changelog.d/13172.misc b/changelog.d/13172.misc
new file mode 100644
index 0000000000..124a1b3662
--- /dev/null
+++ b/changelog.d/13172.misc
@@ -0,0 +1 @@
+Always use a version of canonicaljson that supports the C implementation of frozendict.
diff --git a/poetry.lock b/poetry.lock
index 3a08c9478d..41ab40edd1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1563,7 +1563,7 @@ url_preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "e96625923122e29b6ea5964379828e321b6cede2b020fc32c6f86c09d86d1ae8"
+content-hash = "c24bbcee7e86dbbe7cdbf49f91a25b310bf21095452641e7440129f59b077f78"
 
 [metadata.files]
 attrs = [
diff --git a/pyproject.toml b/pyproject.toml
index f77c02ca27..21bea2ba01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,7 +110,9 @@ jsonschema = ">=3.0.0"
 frozendict = ">=1,!=2.1.2"
 # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0
 unpaddedbase64 = ">=2.1.0"
-canonicaljson = "^1.4.0"
+# We require 1.5.0 to work around an issue when running against the C implementation of
+# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36
+canonicaljson = "^1.5.0"
 # we use the type definitions added in signedjson 1.1.
 signedjson = "^1.1.0"
 # validating SSL certs for IP addresses requires service_identity 18.1.

From 5526f9fc4f53c9e0eac6aa610bf0a76906b772dc Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 18 Jul 2022 17:39:39 +0100
Subject: [PATCH 147/178] Fix overcounting of pushers when they are replaced
 (#13296)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13296.bugfix   |  1 +
 synapse/push/pusherpool.py | 27 ++++++++++++++++-----------
 2 files changed, 17 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/13296.bugfix

diff --git a/changelog.d/13296.bugfix b/changelog.d/13296.bugfix
new file mode 100644
index 0000000000..ff0eb2b4a1
--- /dev/null
+++ b/changelog.d/13296.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.18.0 where the `synapse_pushers` metric would overcount pushers when they are replaced.
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index d0cc657b44..1e0ef44fc7 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -328,7 +328,7 @@ class PusherPool:
             return None
 
         try:
-            p = self.pusher_factory.create_pusher(pusher_config)
+            pusher = self.pusher_factory.create_pusher(pusher_config)
         except PusherConfigException as e:
             logger.warning(
                 "Pusher incorrectly configured id=%i, user=%s, appid=%s, pushkey=%s: %s",
@@ -346,23 +346,28 @@ class PusherPool:
             )
             return None
 
-        if not p:
+        if not pusher:
             return None
 
-        appid_pushkey = "%s:%s" % (pusher_config.app_id, pusher_config.pushkey)
+        appid_pushkey = "%s:%s" % (pusher.app_id, pusher.pushkey)
 
-        byuser = self.pushers.setdefault(pusher_config.user_name, {})
+        byuser = self.pushers.setdefault(pusher.user_id, {})
         if appid_pushkey in byuser:
-            byuser[appid_pushkey].on_stop()
-        byuser[appid_pushkey] = p
+            previous_pusher = byuser[appid_pushkey]
+            previous_pusher.on_stop()
 
-        synapse_pushers.labels(type(p).__name__, p.app_id).inc()
+            synapse_pushers.labels(
+                type(previous_pusher).__name__, previous_pusher.app_id
+            ).dec()
+        byuser[appid_pushkey] = pusher
+
+        synapse_pushers.labels(type(pusher).__name__, pusher.app_id).inc()
 
         # Check if there *may* be push to process. We do this as this check is a
         # lot cheaper to do than actually fetching the exact rows we need to
         # push.
-        user_id = pusher_config.user_name
-        last_stream_ordering = pusher_config.last_stream_ordering
+        user_id = pusher.user_id
+        last_stream_ordering = pusher.last_stream_ordering
         if last_stream_ordering:
             have_notifs = await self.store.get_if_maybe_push_in_range_for_user(
                 user_id, last_stream_ordering
@@ -372,9 +377,9 @@ class PusherPool:
             # risk missing push.
             have_notifs = True
 
-        p.on_started(have_notifs)
+        pusher.on_started(have_notifs)
 
-        return p
+        return pusher
 
     async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None:
         appid_pushkey = "%s:%s" % (app_id, pushkey)

From 15edf23626a55ee38a8ee394bfcc8eaeff733c37 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 18 Jul 2022 12:35:45 -0700
Subject: [PATCH 148/178] Improve performance of query `
 _get_subset_users_in_room_with_profiles` (#13299)

---
 changelog.d/13299.misc                       | 1 +
 synapse/storage/databases/main/roommember.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13299.misc

diff --git a/changelog.d/13299.misc b/changelog.d/13299.misc
new file mode 100644
index 0000000000..a9d5566873
--- /dev/null
+++ b/changelog.d/13299.misc
@@ -0,0 +1 @@
+Improve performance of query  `_get_subset_users_in_room_with_profiles`.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 105a518677..46ab6232d4 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -243,7 +243,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             txn: LoggingTransaction,
         ) -> Dict[str, ProfileInfo]:
             clause, ids = make_in_list_sql_clause(
-                self.database_engine, "m.user_id", user_ids
+                self.database_engine, "c.state_key", user_ids
             )
 
             sql = """

From 7864f33e286dec22368dc0b11c06eebb1462a51e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 18 Jul 2022 13:15:23 -0700
Subject: [PATCH 149/178] Increase batch size of `bulk_get_push_rules` and
 `_get_joined_profiles_from_event_ids`. (#13300)

---
 changelog.d/13300.misc                       | 1 +
 synapse/storage/databases/main/push_rule.py  | 1 +
 synapse/storage/databases/main/roommember.py | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13300.misc

diff --git a/changelog.d/13300.misc b/changelog.d/13300.misc
new file mode 100644
index 0000000000..ee58add3c4
--- /dev/null
+++ b/changelog.d/13300.misc
@@ -0,0 +1 @@
+Up batch size of `bulk_get_push_rules` and `_get_joined_profiles_from_event_ids`.
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 86649c1e6c..768f95d16c 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -228,6 +228,7 @@ class PushRulesWorkerStore(
             iterable=user_ids,
             retcols=("*",),
             desc="bulk_get_push_rules",
+            batch_size=1000,
         )
 
         rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 46ab6232d4..df6b82660e 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -904,7 +904,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             iterable=event_ids,
             retcols=("user_id", "display_name", "avatar_url", "event_id"),
             keyvalues={"membership": Membership.JOIN},
-            batch_size=500,
+            batch_size=1000,
             desc="_get_joined_profiles_from_event_ids",
         )
 

From 2ee0b6ef4b78bada535beb30301cf0e01cbb7d81 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 19 Jul 2022 13:25:29 +0200
Subject: [PATCH 150/178] Safe async event cache (#13308)

Fix race conditions in the async cache invalidation logic, by separating
the async & local invalidation calls and ensuring any async call i
executed first.

Signed off by Nick @ Beeper (@Fizzadar).
---
 changelog.d/13308.misc                        |  1 +
 synapse/storage/_base.py                      |  9 +++-
 synapse/storage/database.py                   | 54 ++++++++++++++++---
 .../storage/databases/main/censor_events.py   |  2 +-
 synapse/storage/databases/main/events.py      |  6 +--
 .../storage/databases/main/events_worker.py   | 48 +++++++++++++----
 .../databases/main/monthly_active_users.py    |  1 +
 .../storage/databases/main/purge_events.py    |  2 +-
 8 files changed, 102 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/13308.misc

diff --git a/changelog.d/13308.misc b/changelog.d/13308.misc
new file mode 100644
index 0000000000..7f8ec0815f
--- /dev/null
+++ b/changelog.d/13308.misc
@@ -0,0 +1 @@
+Use an asynchronous cache wrapper for the get event cache. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index b8c8dcd76b..a2f8310388 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -96,6 +96,10 @@ class SQLBaseStore(metaclass=ABCMeta):
         cache doesn't exist. Mainly used for invalidating caches on workers,
         where they may not have the cache.
 
+        Note that this function does not invalidate any remote caches, only the
+        local in-memory ones. Any remote invalidation must be performed before
+        calling this.
+
         Args:
             cache_name
             key: Entry to invalidate. If None then invalidates the entire
@@ -112,7 +116,10 @@ class SQLBaseStore(metaclass=ABCMeta):
         if key is None:
             cache.invalidate_all()
         else:
-            cache.invalidate(tuple(key))
+            # Prefer any local-only invalidation method. Invalidating any non-local
+            # cache must be be done before this.
+            invalidate_method = getattr(cache, "invalidate_local", cache.invalidate)
+            invalidate_method(tuple(key))
 
 
 def db_to_json(db_content: Union[memoryview, bytes, bytearray, str]) -> Any:
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6a6d0dcd73..ea672ff89e 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -23,6 +23,7 @@ from time import monotonic as monotonic_time
 from typing import (
     TYPE_CHECKING,
     Any,
+    Awaitable,
     Callable,
     Collection,
     Dict,
@@ -57,7 +58,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.types import Connection, Cursor
-from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
+from synapse.util.async_helpers import delay_cancellation
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -168,6 +169,7 @@ class LoggingDatabaseConnection:
         *,
         txn_name: Optional[str] = None,
         after_callbacks: Optional[List["_CallbackListEntry"]] = None,
+        async_after_callbacks: Optional[List["_AsyncCallbackListEntry"]] = None,
         exception_callbacks: Optional[List["_CallbackListEntry"]] = None,
     ) -> "LoggingTransaction":
         if not txn_name:
@@ -178,6 +180,7 @@ class LoggingDatabaseConnection:
             name=txn_name,
             database_engine=self.engine,
             after_callbacks=after_callbacks,
+            async_after_callbacks=async_after_callbacks,
             exception_callbacks=exception_callbacks,
         )
 
@@ -209,6 +212,9 @@ class LoggingDatabaseConnection:
 
 # The type of entry which goes on our after_callbacks and exception_callbacks lists.
 _CallbackListEntry = Tuple[Callable[..., object], Tuple[object, ...], Dict[str, object]]
+_AsyncCallbackListEntry = Tuple[
+    Callable[..., Awaitable], Tuple[object, ...], Dict[str, object]
+]
 
 P = ParamSpec("P")
 R = TypeVar("R")
@@ -227,6 +233,10 @@ class LoggingTransaction:
             that have been added by `call_after` which should be run on
             successful completion of the transaction. None indicates that no
             callbacks should be allowed to be scheduled to run.
+        async_after_callbacks: A list that asynchronous callbacks will be appended
+            to by `async_call_after` which should run, before after_callbacks, on
+            successful completion of the transaction. None indicates that no
+            callbacks should be allowed to be scheduled to run.
         exception_callbacks: A list that callbacks will be appended
             to that have been added by `call_on_exception` which should be run
             if transaction ends with an error. None indicates that no callbacks
@@ -238,6 +248,7 @@ class LoggingTransaction:
         "name",
         "database_engine",
         "after_callbacks",
+        "async_after_callbacks",
         "exception_callbacks",
     ]
 
@@ -247,12 +258,14 @@ class LoggingTransaction:
         name: str,
         database_engine: BaseDatabaseEngine,
         after_callbacks: Optional[List[_CallbackListEntry]] = None,
+        async_after_callbacks: Optional[List[_AsyncCallbackListEntry]] = None,
         exception_callbacks: Optional[List[_CallbackListEntry]] = None,
     ):
         self.txn = txn
         self.name = name
         self.database_engine = database_engine
         self.after_callbacks = after_callbacks
+        self.async_after_callbacks = async_after_callbacks
         self.exception_callbacks = exception_callbacks
 
     def call_after(
@@ -277,6 +290,28 @@ class LoggingTransaction:
         # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
         self.after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
 
+    def async_call_after(
+        self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs
+    ) -> None:
+        """Call the given asynchronous callback on the main twisted thread after
+        the transaction has finished (but before those added in `call_after`).
+
+        Mostly used to invalidate remote caches after transactions.
+
+        Note that transactions may be retried a few times if they encounter database
+        errors such as serialization failures. Callbacks given to `async_call_after`
+        will accumulate across transaction attempts and will _all_ be called once a
+        transaction attempt succeeds, regardless of whether previous transaction
+        attempts failed. Otherwise, if all transaction attempts fail, all
+        `call_on_exception` callbacks will be run instead.
+        """
+        # if self.async_after_callbacks is None, that means that whatever constructed the
+        # LoggingTransaction isn't expecting there to be any callbacks; assert that
+        # is not the case.
+        assert self.async_after_callbacks is not None
+        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
+        self.async_after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+
     def call_on_exception(
         self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs
     ) -> None:
@@ -574,6 +609,7 @@ class DatabasePool:
         conn: LoggingDatabaseConnection,
         desc: str,
         after_callbacks: List[_CallbackListEntry],
+        async_after_callbacks: List[_AsyncCallbackListEntry],
         exception_callbacks: List[_CallbackListEntry],
         func: Callable[Concatenate[LoggingTransaction, P], R],
         *args: P.args,
@@ -597,6 +633,7 @@ class DatabasePool:
             conn
             desc
             after_callbacks
+            async_after_callbacks
             exception_callbacks
             func
             *args
@@ -659,6 +696,7 @@ class DatabasePool:
                 cursor = conn.cursor(
                     txn_name=name,
                     after_callbacks=after_callbacks,
+                    async_after_callbacks=async_after_callbacks,
                     exception_callbacks=exception_callbacks,
                 )
                 try:
@@ -798,6 +836,7 @@ class DatabasePool:
 
         async def _runInteraction() -> R:
             after_callbacks: List[_CallbackListEntry] = []
+            async_after_callbacks: List[_AsyncCallbackListEntry] = []
             exception_callbacks: List[_CallbackListEntry] = []
 
             if not current_context():
@@ -809,6 +848,7 @@ class DatabasePool:
                         self.new_transaction,
                         desc,
                         after_callbacks,
+                        async_after_callbacks,
                         exception_callbacks,
                         func,
                         *args,
@@ -817,15 +857,17 @@ class DatabasePool:
                         **kwargs,
                     )
 
+                # We order these assuming that async functions call out to external
+                # systems (e.g. to invalidate a cache) and the sync functions make these
+                # changes on any local in-memory caches/similar, and thus must be second.
+                for async_callback, async_args, async_kwargs in async_after_callbacks:
+                    await async_callback(*async_args, **async_kwargs)
                 for after_callback, after_args, after_kwargs in after_callbacks:
-                    await maybe_awaitable(after_callback(*after_args, **after_kwargs))
-
+                    after_callback(*after_args, **after_kwargs)
                 return cast(R, result)
             except Exception:
                 for exception_callback, after_args, after_kwargs in exception_callbacks:
-                    await maybe_awaitable(
-                        exception_callback(*after_args, **after_kwargs)
-                    )
+                    exception_callback(*after_args, **after_kwargs)
                 raise
 
         # To handle cancellation, we ensure that `after_callback`s and
diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py
index fd3fc298b3..58177ecec1 100644
--- a/synapse/storage/databases/main/censor_events.py
+++ b/synapse/storage/databases/main/censor_events.py
@@ -194,7 +194,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase
             # changed its content in the database. We can't call
             # self._invalidate_cache_and_stream because self.get_event_cache isn't of the
             # right type.
-            txn.call_after(self._get_event_cache.invalidate, (event.event_id,))
+            self.invalidate_get_event_cache_after_txn(txn, event.event_id)
             # Send that invalidation to replication so that other workers also invalidate
             # the event cache.
             self._send_invalidation_to_replication(
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index fa2266ba20..156e1bd5ab 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1293,7 +1293,7 @@ class PersistEventsStore:
         depth_updates: Dict[str, int] = {}
         for event, context in events_and_contexts:
             # Remove the any existing cache entries for the event_ids
-            txn.call_after(self.store._invalidate_get_event_cache, event.event_id)
+            self.store.invalidate_get_event_cache_after_txn(txn, event.event_id)
             # Then update the `stream_ordering` position to mark the latest
             # event as the front of the room. This should not be done for
             # backfilled events because backfilled events have negative
@@ -1675,7 +1675,7 @@ class PersistEventsStore:
                     (cache_entry.event.event_id,), cache_entry
                 )
 
-        txn.call_after(prefill)
+        txn.async_call_after(prefill)
 
     def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None:
         """Invalidate the caches for the redacted event.
@@ -1684,7 +1684,7 @@ class PersistEventsStore:
         _invalidate_caches_for_event.
         """
         assert event.redacts is not None
-        txn.call_after(self.store._invalidate_get_event_cache, event.redacts)
+        self.store.invalidate_get_event_cache_after_txn(txn, event.redacts)
         txn.call_after(self.store.get_relations_for_event.invalidate, (event.redacts,))
         txn.call_after(self.store.get_applicable_edit.invalidate, (event.redacts,))
 
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index f3935bfead..4435373146 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -712,17 +712,41 @@ class EventsWorkerStore(SQLBaseStore):
 
         return event_entry_map
 
-    async def _invalidate_get_event_cache(self, event_id: str) -> None:
-        # First we invalidate the asynchronous cache instance. This may include
-        # out-of-process caches such as Redis/memcache. Once complete we can
-        # invalidate any in memory cache. The ordering is important here to
-        # ensure we don't pull in any remote invalid value after we invalidate
-        # the in-memory cache.
+    def invalidate_get_event_cache_after_txn(
+        self, txn: LoggingTransaction, event_id: str
+    ) -> None:
+        """
+        Prepares a database transaction to invalidate the get event cache for a given
+        event ID when executed successfully. This is achieved by attaching two callbacks
+        to the transaction, one to invalidate the async cache and one for the in memory
+        sync cache (importantly called in that order).
+
+        Arguments:
+            txn: the database transaction to attach the callbacks to
+            event_id: the event ID to be invalidated from caches
+        """
+
+        txn.async_call_after(self._invalidate_async_get_event_cache, event_id)
+        txn.call_after(self._invalidate_local_get_event_cache, event_id)
+
+    async def _invalidate_async_get_event_cache(self, event_id: str) -> None:
+        """
+        Invalidates an event in the asyncronous get event cache, which may be remote.
+
+        Arguments:
+            event_id: the event ID to invalidate
+        """
+
         await self._get_event_cache.invalidate((event_id,))
-        self._event_ref.pop(event_id, None)
-        self._current_event_fetches.pop(event_id, None)
 
     def _invalidate_local_get_event_cache(self, event_id: str) -> None:
+        """
+        Invalidates an event in local in-memory get event caches.
+
+        Arguments:
+            event_id: the event ID to invalidate
+        """
+
         self._get_event_cache.invalidate_local((event_id,))
         self._event_ref.pop(event_id, None)
         self._current_event_fetches.pop(event_id, None)
@@ -958,7 +982,13 @@ class EventsWorkerStore(SQLBaseStore):
                 }
 
                 row_dict = self.db_pool.new_transaction(
-                    conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch
+                    conn,
+                    "do_fetch",
+                    [],
+                    [],
+                    [],
+                    self._fetch_event_rows,
+                    events_to_fetch,
                 )
 
                 # We only want to resolve deferreds from the main thread
diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
index 9a63f953fb..efd136a864 100644
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -66,6 +66,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore):
                 "initialise_mau_threepids",
                 [],
                 [],
+                [],
                 self._initialise_reserved_users,
                 hs.config.server.mau_limits_reserved_threepids[: self._max_mau_value],
             )
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 6d42276503..f6822707e4 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -304,7 +304,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
                 self._invalidate_cache_and_stream(
                     txn, self.have_seen_event, (room_id, event_id)
                 )
-                txn.call_after(self._invalidate_get_event_cache, event_id)
+                self.invalidate_get_event_cache_after_txn(txn, event_id)
 
         logger.info("[purge] done")
 

From b9778673587941277e15b067ad39cdf084f7dde5 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 19 Jul 2022 12:45:17 +0100
Subject: [PATCH 151/178] Rate limit joins per-room (#13276)

---
 changelog.d/13276.feature                     |   1 +
 .../conf/workers-shared-extra.yaml.j2         |   4 +
 docs/upgrade.md                               |  10 +
 .../configuration/config_documentation.md     |  19 ++
 synapse/config/ratelimiting.py                |   7 +
 synapse/federation/federation_server.py       |  16 +
 synapse/handlers/federation_event.py          |   4 +
 synapse/handlers/message.py                   |  11 +
 synapse/handlers/room_member.py               |  37 +++
 synapse/replication/tcp/client.py             |  17 +-
 synapse/replication/tcp/streams/events.py     |   1 +
 .../storage/databases/main/events_worker.py   |  22 +-
 tests/federation/test_federation_server.py    |  63 +++-
 tests/handlers/test_room_member.py            | 290 ++++++++++++++++++
 tests/rest/client/test_rooms.py               |   4 +-
 tests/test_server.py                          |   2 +-
 tests/unittest.py                             |   4 +-
 tests/utils.py                                |   1 +
 18 files changed, 498 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13276.feature
 create mode 100644 tests/handlers/test_room_member.py

diff --git a/changelog.d/13276.feature b/changelog.d/13276.feature
new file mode 100644
index 0000000000..068d158ed5
--- /dev/null
+++ b/changelog.d/13276.feature
@@ -0,0 +1 @@
+Add per-room rate limiting for room joins. For each room, Synapse now monitors the rate of join events in that room, and throttle additional joins if that rate grows too large.
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index b5f675bc73..9e554a865e 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -67,6 +67,10 @@ rc_joins:
     per_second: 9999
     burst_count: 9999
 
+rc_joins_per_room:
+    per_second: 9999
+    burst_count: 9999
+
 rc_3pid_validation:
   per_second: 1000
   burst_count: 1000
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 3aaeb499ce..2c7c258909 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -104,6 +104,16 @@ minimum, a `notif_from` setting.)
 Specifying an `email` setting under `account_threepid_delegates` will now cause
 an error at startup.
 
+## Changes to the event replication streams
+
+Synapse now includes a flag indicating if an event is an outlier when
+replicating it to other workers. This is a forwards- and backwards-incompatible
+change: v1.63 and workers cannot process events replicated by v1.64 workers, and
+vice versa.
+
+Once all workers are upgraded to v1.64 (or downgraded to v1.63), event
+replication will resume as normal.
+
 # Upgrading to v1.62.0
 
 ## New signatures for spam checker callbacks
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 5fe502e33a..be272a400c 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1471,6 +1471,25 @@ rc_joins:
     per_second: 0.03
     burst_count: 12
 ```
+---
+### `rc_joins_per_room`
+
+This option allows admins to ratelimit joins to a room based on the number of recent
+joins (local or remote) to that room. It is intended to mitigate mass-join spam
+waves which target multiple homeservers.
+
+By default, one join is permitted to a room every second, with an accumulating
+buffer of up to ten instantaneous joins.
+
+Example configuration (default values):
+```yaml
+rc_joins_per_room:
+  per_second: 1
+  burst_count: 10
+```
+
+_Added in Synapse 1.64.0._
+
 ---
 ### `rc_3pid_validation`
 
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 4fc1784efe..5a91917b4a 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -112,6 +112,13 @@ class RatelimitConfig(Config):
             defaults={"per_second": 0.01, "burst_count": 10},
         )
 
+        # Track the rate of joins to a given room. If there are too many, temporarily
+        # prevent local joins and remote joins via this server.
+        self.rc_joins_per_room = RateLimitConfig(
+            config.get("rc_joins_per_room", {}),
+            defaults={"per_second": 1, "burst_count": 10},
+        )
+
         # Ratelimit cross-user key requests:
         # * For local requests this is keyed by the sending device.
         # * For requests received over federation this is keyed by the origin.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 5dfdc86740..ae550d3f4d 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -118,6 +118,7 @@ class FederationServer(FederationBase):
         self._federation_event_handler = hs.get_federation_event_handler()
         self.state = hs.get_state_handler()
         self._event_auth_handler = hs.get_event_auth_handler()
+        self._room_member_handler = hs.get_room_member_handler()
 
         self._state_storage_controller = hs.get_storage_controllers().state
 
@@ -621,6 +622,15 @@ class FederationServer(FederationBase):
             )
             raise IncompatibleRoomVersionError(room_version=room_version)
 
+        # Refuse the request if that room has seen too many joins recently.
+        # This is in addition to the HS-level rate limiting applied by
+        # BaseFederationServlet.
+        # type-ignore: mypy doesn't seem able to deduce the type of the limiter(!?)
+        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
+            requester=None,
+            key=room_id,
+            update=False,
+        )
         pdu = await self.handler.on_make_join_request(origin, room_id, user_id)
         return {"event": pdu.get_templated_pdu_json(), "room_version": room_version}
 
@@ -655,6 +665,12 @@ class FederationServer(FederationBase):
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
+        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
+            requester=None,
+            key=room_id,
+            update=False,
+        )
+
         event, context = await self._on_send_membership_event(
             origin, content, Membership.JOIN, room_id
         )
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index b1dab57447..766d9849f5 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1980,6 +1980,10 @@ class FederationEventHandler:
             event, event_pos, max_stream_token, extra_users=extra_users
         )
 
+        if event.type == EventTypes.Member and event.membership == Membership.JOIN:
+            # TODO retrieve the previous state, and exclude join -> join transitions
+            self._notifier.notify_user_joined_room(event.event_id, event.room_id)
+
     def _sanity_check_event(self, ev: EventBase) -> None:
         """
         Do some early sanity checks of a received event
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 85abe71ea8..bd7baef051 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -463,6 +463,7 @@ class EventCreationHandler:
         )
         self._events_shard_config = self.config.worker.events_shard_config
         self._instance_name = hs.get_instance_name()
+        self._notifier = hs.get_notifier()
 
         self.room_prejoin_state_types = self.hs.config.api.room_prejoin_state
 
@@ -1550,6 +1551,16 @@ class EventCreationHandler:
                 requester, is_admin_redaction=is_admin_redaction
             )
 
+        if event.type == EventTypes.Member and event.membership == Membership.JOIN:
+            (
+                current_membership,
+                _,
+            ) = await self.store.get_local_current_membership_for_user_in_room(
+                event.state_key, event.room_id
+            )
+            if current_membership != Membership.JOIN:
+                self._notifier.notify_user_joined_room(event.event_id, event.room_id)
+
         await self._maybe_kick_guest_users(event, context)
 
         if event.type == EventTypes.CanonicalAlias:
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index a5b9ac904e..30b4cb23df 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -94,12 +94,29 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             rate_hz=hs.config.ratelimiting.rc_joins_local.per_second,
             burst_count=hs.config.ratelimiting.rc_joins_local.burst_count,
         )
+        # Tracks joins from local users to rooms this server isn't a member of.
+        # I.e. joins this server makes by requesting /make_join /send_join from
+        # another server.
         self._join_rate_limiter_remote = Ratelimiter(
             store=self.store,
             clock=self.clock,
             rate_hz=hs.config.ratelimiting.rc_joins_remote.per_second,
             burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count,
         )
+        # TODO: find a better place to keep this Ratelimiter.
+        #   It needs to be
+        #    - written to by event persistence code
+        #    - written to by something which can snoop on replication streams
+        #    - read by the RoomMemberHandler to rate limit joins from local users
+        #    - read by the FederationServer to rate limit make_joins and send_joins from
+        #      other homeservers
+        #   I wonder if a homeserver-wide collection of rate limiters might be cleaner?
+        self._join_rate_per_room_limiter = Ratelimiter(
+            store=self.store,
+            clock=self.clock,
+            rate_hz=hs.config.ratelimiting.rc_joins_per_room.per_second,
+            burst_count=hs.config.ratelimiting.rc_joins_per_room.burst_count,
+        )
 
         # Ratelimiter for invites, keyed by room (across all issuers, all
         # recipients).
@@ -136,6 +153,18 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         )
 
         self.request_ratelimiter = hs.get_request_ratelimiter()
+        hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room)
+
+    def _on_user_joined_room(self, event_id: str, room_id: str) -> None:
+        """Notify the rate limiter that a room join has occurred.
+
+        Use this to inform the RoomMemberHandler about joins that have either
+        - taken place on another homeserver, or
+        - on another worker in this homeserver.
+        Joins actioned by this worker should use the usual `ratelimit` method, which
+        checks the limit and increments the counter in one go.
+        """
+        self._join_rate_per_room_limiter.record_action(requester=None, key=room_id)
 
     @abc.abstractmethod
     async def _remote_join(
@@ -396,6 +425,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             # up blocking profile updates.
             if newly_joined and ratelimit:
                 await self._join_rate_limiter_local.ratelimit(requester)
+                await self._join_rate_per_room_limiter.ratelimit(
+                    requester, key=room_id, update=False
+                )
 
         result_event = await self.event_creation_handler.handle_new_client_event(
             requester,
@@ -867,6 +899,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     await self._join_rate_limiter_remote.ratelimit(
                         requester,
                     )
+                    await self._join_rate_per_room_limiter.ratelimit(
+                        requester,
+                        key=room_id,
+                        update=False,
+                    )
 
                 inviter = await self._get_inviter(target.to_string(), room_id)
                 if inviter and not self.hs.is_mine(inviter):
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 2f59245058..e4f2201c92 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -21,7 +21,7 @@ from twisted.internet.interfaces import IAddress, IConnector
 from twisted.internet.protocol import ReconnectingClientFactory
 from twisted.python.failure import Failure
 
-from synapse.api.constants import EventTypes, ReceiptTypes
+from synapse.api.constants import EventTypes, Membership, ReceiptTypes
 from synapse.federation import send_queue
 from synapse.federation.sender import FederationSender
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
@@ -219,6 +219,21 @@ class ReplicationDataHandler:
                     membership=row.data.membership,
                 )
 
+                # If this event is a join, make a note of it so we have an accurate
+                # cross-worker room rate limit.
+                # TODO: Erik said we should exclude rows that came from ex_outliers
+                #  here, but I don't see how we can determine that. I guess we could
+                #  add a flag to row.data?
+                if (
+                    row.data.type == EventTypes.Member
+                    and row.data.membership == Membership.JOIN
+                    and not row.data.outlier
+                ):
+                    # TODO retrieve the previous state, and exclude join -> join transitions
+                    self.notifier.notify_user_joined_room(
+                        row.data.event_id, row.data.room_id
+                    )
+
         await self._presence_handler.process_replication_rows(
             stream_name, instance_name, token, rows
         )
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index 26f4fa7cfd..14b6705862 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -98,6 +98,7 @@ class EventsStreamEventRow(BaseEventsStreamRow):
     relates_to: Optional[str]
     membership: Optional[str]
     rejected: bool
+    outlier: bool
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 4435373146..5914a35420 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1490,7 +1490,7 @@ class EventsWorkerStore(SQLBaseStore):
 
     async def get_all_new_forward_event_rows(
         self, instance_name: str, last_id: int, current_id: int, limit: int
-    ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]:
+    ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
         """Returns new events, for the Events replication stream
 
         Args:
@@ -1506,10 +1506,11 @@ class EventsWorkerStore(SQLBaseStore):
 
         def get_all_new_forward_event_rows(
             txn: LoggingTransaction,
-        ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]:
+        ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
             sql = (
                 "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
-                " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL"
+                " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL,"
+                " e.outlier"
                 " FROM events AS e"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events AS se USING (event_id)"
@@ -1523,7 +1524,8 @@ class EventsWorkerStore(SQLBaseStore):
             )
             txn.execute(sql, (last_id, current_id, instance_name, limit))
             return cast(
-                List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall()
+                List[Tuple[int, str, str, str, str, str, str, str, bool, bool]],
+                txn.fetchall(),
             )
 
         return await self.db_pool.runInteraction(
@@ -1532,7 +1534,7 @@ class EventsWorkerStore(SQLBaseStore):
 
     async def get_ex_outlier_stream_rows(
         self, instance_name: str, last_id: int, current_id: int
-    ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]:
+    ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
         """Returns de-outliered events, for the Events replication stream
 
         Args:
@@ -1547,11 +1549,14 @@ class EventsWorkerStore(SQLBaseStore):
 
         def get_ex_outlier_stream_rows_txn(
             txn: LoggingTransaction,
-        ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]:
+        ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
             sql = (
                 "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
-                " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL"
+                " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL,"
+                " e.outlier"
                 " FROM events AS e"
+                # NB: the next line (inner join) is what makes this query different from
+                # get_all_new_forward_event_rows.
                 " INNER JOIN ex_outlier_stream AS out USING (event_id)"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events AS se USING (event_id)"
@@ -1566,7 +1571,8 @@ class EventsWorkerStore(SQLBaseStore):
 
             txn.execute(sql, (last_id, current_id, instance_name))
             return cast(
-                List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall()
+                List[Tuple[int, str, str, str, str, str, str, str, bool, bool]],
+                txn.fetchall(),
             )
 
         return await self.db_pool.runInteraction(
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 8ea13ceb93..3a6ef221ae 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -148,7 +148,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         tok2 = self.login("fozzie", "bear")
         self.helper.join(self._room_id, second_member_user_id, tok=tok2)
 
-    def _make_join(self, user_id) -> JsonDict:
+    def _make_join(self, user_id: str) -> JsonDict:
         channel = self.make_signed_federation_request(
             "GET",
             f"/_matrix/federation/v1/make_join/{self._room_id}/{user_id}"
@@ -260,6 +260,67 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         )
         self.assertEqual(r[("m.room.member", joining_user)].membership, "join")
 
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 3}})
+    def test_make_join_respects_room_join_rate_limit(self) -> None:
+        # In the test setup, two users join the room. Since the rate limiter burst
+        # count is 3, a new make_join request to the room should be accepted.
+
+        joining_user = "@ronniecorbett:" + self.OTHER_SERVER_NAME
+        self._make_join(joining_user)
+
+        # Now have a new local user join the room. This saturates the rate limiter
+        # bucket, so the next make_join should be denied.
+        new_local_user = self.register_user("animal", "animal")
+        token = self.login("animal", "animal")
+        self.helper.join(self._room_id, new_local_user, tok=token)
+
+        joining_user = "@ronniebarker:" + self.OTHER_SERVER_NAME
+        channel = self.make_signed_federation_request(
+            "GET",
+            f"/_matrix/federation/v1/make_join/{self._room_id}/{joining_user}"
+            f"?ver={DEFAULT_ROOM_VERSION}",
+        )
+        self.assertEqual(channel.code, HTTPStatus.TOO_MANY_REQUESTS, channel.json_body)
+
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 3}})
+    def test_send_join_contributes_to_room_join_rate_limit_and_is_limited(self) -> None:
+        # Make two make_join requests up front. (These are rate limited, but do not
+        # contribute to the rate limit.)
+        join_event_dicts = []
+        for i in range(2):
+            joining_user = f"@misspiggy{i}:{self.OTHER_SERVER_NAME}"
+            join_result = self._make_join(joining_user)
+            join_event_dict = join_result["event"]
+            self.add_hashes_and_signatures_from_other_server(
+                join_event_dict,
+                KNOWN_ROOM_VERSIONS[DEFAULT_ROOM_VERSION],
+            )
+            join_event_dicts.append(join_event_dict)
+
+        # In the test setup, two users join the room. Since the rate limiter burst
+        # count is 3, the first send_join should be accepted...
+        channel = self.make_signed_federation_request(
+            "PUT",
+            f"/_matrix/federation/v2/send_join/{self._room_id}/join0",
+            content=join_event_dicts[0],
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        # ... but the second should be denied.
+        channel = self.make_signed_federation_request(
+            "PUT",
+            f"/_matrix/federation/v2/send_join/{self._room_id}/join1",
+            content=join_event_dicts[1],
+        )
+        self.assertEqual(channel.code, HTTPStatus.TOO_MANY_REQUESTS, channel.json_body)
+
+    # NB: we could write a test which checks that the send_join event is seen
+    #   by other workers over replication, and that they update their rate limit
+    #   buckets accordingly. I'm going to assume that the join event gets sent over
+    #   replication, at which point the tests.handlers.room_member test
+    #       test_local_users_joining_on_another_worker_contribute_to_rate_limit
+    #   is probably sufficient to reassure that the bucket is updated.
+
 
 def _create_acl_event(content):
     return make_event_from_dict(
diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py
new file mode 100644
index 0000000000..254e7e4b80
--- /dev/null
+++ b/tests/handlers/test_room_member.py
@@ -0,0 +1,290 @@
+from http import HTTPStatus
+from unittest.mock import Mock, patch
+
+from twisted.test.proto_helpers import MemoryReactor
+
+import synapse.rest.admin
+import synapse.rest.client.login
+import synapse.rest.client.room
+from synapse.api.constants import EventTypes, Membership
+from synapse.api.errors import LimitExceededError
+from synapse.crypto.event_signing import add_hashes_and_signatures
+from synapse.events import FrozenEventV3
+from synapse.federation.federation_client import SendJoinResult
+from synapse.server import HomeServer
+from synapse.types import UserID, create_requester
+from synapse.util import Clock
+
+from tests.replication._base import RedisMultiWorkerStreamTestCase
+from tests.server import make_request
+from tests.test_utils import make_awaitable
+from tests.unittest import FederatingHomeserverTestCase, override_config
+
+
+class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.client.login.register_servlets,
+        synapse.rest.client.room.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.handler = hs.get_room_member_handler()
+
+        # Create three users.
+        self.alice = self.register_user("alice", "pass")
+        self.alice_token = self.login("alice", "pass")
+        self.bob = self.register_user("bob", "pass")
+        self.bob_token = self.login("bob", "pass")
+        self.chris = self.register_user("chris", "pass")
+        self.chris_token = self.login("chris", "pass")
+
+        # Create a room on this homeserver. Note that this counts as a join: it
+        # contributes to the rate limter's count of actions
+        self.room_id = self.helper.create_room_as(self.alice, tok=self.alice_token)
+
+        self.intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}"
+
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}})
+    def test_local_user_local_joins_contribute_to_limit_and_are_limited(self) -> None:
+        # The rate limiter has accumulated one token from Alice's join after the create
+        # event.
+        # Try joining the room as Bob.
+        self.get_success(
+            self.handler.update_membership(
+                requester=create_requester(self.bob),
+                target=UserID.from_string(self.bob),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+            )
+        )
+
+        # The rate limiter bucket is full. A second join should be denied.
+        self.get_failure(
+            self.handler.update_membership(
+                requester=create_requester(self.chris),
+                target=UserID.from_string(self.chris),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+            ),
+            LimitExceededError,
+        )
+
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}})
+    def test_local_user_profile_edits_dont_contribute_to_limit(self) -> None:
+        # The rate limiter has accumulated one token from Alice's join after the create
+        # event. Alice should still be able to change her displayname.
+        self.get_success(
+            self.handler.update_membership(
+                requester=create_requester(self.alice),
+                target=UserID.from_string(self.alice),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+                content={"displayname": "Alice Cooper"},
+            )
+        )
+
+        # Still room in the limiter bucket. Chris's join should be accepted.
+        self.get_success(
+            self.handler.update_membership(
+                requester=create_requester(self.chris),
+                target=UserID.from_string(self.chris),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+            )
+        )
+
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 1}})
+    def test_remote_joins_contribute_to_rate_limit(self) -> None:
+        # Join once, to fill the rate limiter bucket.
+        #
+        # To do this we have to mock the responses from the remote homeserver.
+        # We also patch out a bunch of event checks on our end. All we're really
+        # trying to check here is that remote joins will bump the rate limter when
+        # they are persisted.
+        create_event_source = {
+            "auth_events": [],
+            "content": {
+                "creator": f"@creator:{self.OTHER_SERVER_NAME}",
+                "room_version": self.hs.config.server.default_room_version.identifier,
+            },
+            "depth": 0,
+            "origin_server_ts": 0,
+            "prev_events": [],
+            "room_id": self.intially_unjoined_room_id,
+            "sender": f"@creator:{self.OTHER_SERVER_NAME}",
+            "state_key": "",
+            "type": EventTypes.Create,
+        }
+        self.add_hashes_and_signatures_from_other_server(
+            create_event_source,
+            self.hs.config.server.default_room_version,
+        )
+        create_event = FrozenEventV3(
+            create_event_source,
+            self.hs.config.server.default_room_version,
+            {},
+            None,
+        )
+
+        join_event_source = {
+            "auth_events": [create_event.event_id],
+            "content": {"membership": "join"},
+            "depth": 1,
+            "origin_server_ts": 100,
+            "prev_events": [create_event.event_id],
+            "sender": self.bob,
+            "state_key": self.bob,
+            "room_id": self.intially_unjoined_room_id,
+            "type": EventTypes.Member,
+        }
+        add_hashes_and_signatures(
+            self.hs.config.server.default_room_version,
+            join_event_source,
+            self.hs.hostname,
+            self.hs.signing_key,
+        )
+        join_event = FrozenEventV3(
+            join_event_source,
+            self.hs.config.server.default_room_version,
+            {},
+            None,
+        )
+
+        mock_make_membership_event = Mock(
+            return_value=make_awaitable(
+                (
+                    self.OTHER_SERVER_NAME,
+                    join_event,
+                    self.hs.config.server.default_room_version,
+                )
+            )
+        )
+        mock_send_join = Mock(
+            return_value=make_awaitable(
+                SendJoinResult(
+                    join_event,
+                    self.OTHER_SERVER_NAME,
+                    state=[create_event],
+                    auth_chain=[create_event],
+                    partial_state=False,
+                    servers_in_room=[],
+                )
+            )
+        )
+
+        with patch.object(
+            self.handler.federation_handler.federation_client,
+            "make_membership_event",
+            mock_make_membership_event,
+        ), patch.object(
+            self.handler.federation_handler.federation_client,
+            "send_join",
+            mock_send_join,
+        ), patch(
+            "synapse.event_auth._is_membership_change_allowed",
+            return_value=None,
+        ), patch(
+            "synapse.handlers.federation_event.check_state_dependent_auth_rules",
+            return_value=None,
+        ):
+            self.get_success(
+                self.handler.update_membership(
+                    requester=create_requester(self.bob),
+                    target=UserID.from_string(self.bob),
+                    room_id=self.intially_unjoined_room_id,
+                    action=Membership.JOIN,
+                    remote_room_hosts=[self.OTHER_SERVER_NAME],
+                )
+            )
+
+            # Try to join as Chris. Should get denied.
+            self.get_failure(
+                self.handler.update_membership(
+                    requester=create_requester(self.chris),
+                    target=UserID.from_string(self.chris),
+                    room_id=self.intially_unjoined_room_id,
+                    action=Membership.JOIN,
+                    remote_room_hosts=[self.OTHER_SERVER_NAME],
+                ),
+                LimitExceededError,
+            )
+
+    # TODO: test that remote joins to a room are rate limited.
+    #   Could do this by setting the burst count to 1, then:
+    #   - remote-joining a room
+    #   - immediately leaving
+    #   - trying to remote-join again.
+
+
+class TestReplicatedJoinsLimitedByPerRoomRateLimiter(RedisMultiWorkerStreamTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.client.login.register_servlets,
+        synapse.rest.client.room.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.handler = hs.get_room_member_handler()
+
+        # Create three users.
+        self.alice = self.register_user("alice", "pass")
+        self.alice_token = self.login("alice", "pass")
+        self.bob = self.register_user("bob", "pass")
+        self.bob_token = self.login("bob", "pass")
+        self.chris = self.register_user("chris", "pass")
+        self.chris_token = self.login("chris", "pass")
+
+        # Create a room on this homeserver.
+        # Note that this counts as a
+        self.room_id = self.helper.create_room_as(self.alice, tok=self.alice_token)
+        self.intially_unjoined_room_id = "!example:otherhs"
+
+    @override_config({"rc_joins_per_room": {"per_second": 0, "burst_count": 2}})
+    def test_local_users_joining_on_another_worker_contribute_to_rate_limit(
+        self,
+    ) -> None:
+        # The rate limiter has accumulated one token from Alice's join after the create
+        # event.
+        self.replicate()
+
+        # Spawn another worker and have bob join via it.
+        worker_app = self.make_worker_hs(
+            "synapse.app.generic_worker", extra_config={"worker_name": "other worker"}
+        )
+        worker_site = self._hs_to_site[worker_app]
+        channel = make_request(
+            self.reactor,
+            worker_site,
+            "POST",
+            f"/_matrix/client/v3/rooms/{self.room_id}/join",
+            access_token=self.bob_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
+
+        # wait for join to arrive over replication
+        self.replicate()
+
+        # Try to join as Chris on the worker. Should get denied because Alice
+        # and Bob have both joined the room.
+        self.get_failure(
+            worker_app.get_room_member_handler().update_membership(
+                requester=create_requester(self.chris),
+                target=UserID.from_string(self.chris),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+            ),
+            LimitExceededError,
+        )
+
+        # Try to join as Chris on the original worker. Should get denied because Alice
+        # and Bob have both joined the room.
+        self.get_failure(
+            self.handler.update_membership(
+                requester=create_requester(self.chris),
+                target=UserID.from_string(self.chris),
+                room_id=self.room_id,
+                action=Membership.JOIN,
+            ),
+            LimitExceededError,
+        )
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 17571b2d33..c45cb32090 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -710,7 +710,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(43, channel.resource_usage.db_txn_count)
+        self.assertEqual(44, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -723,7 +723,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(49, channel.resource_usage.db_txn_count)
+        self.assertEqual(50, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
diff --git a/tests/test_server.py b/tests/test_server.py
index fc4bce899c..2fe4411401 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -231,7 +231,7 @@ class OptionsResourceTests(unittest.TestCase):
             parse_listener_def({"type": "http", "port": 0}),
             self.resource,
             "1.0",
-            max_request_body_size=1234,
+            max_request_body_size=4096,
             reactor=self.reactor,
         )
 
diff --git a/tests/unittest.py b/tests/unittest.py
index 9f1ff774a8..66ce92f4a6 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -284,7 +284,7 @@ class HomeserverTestCase(TestCase):
             config=self.hs.config.server.listeners[0],
             resource=self.resource,
             server_version_string="1",
-            max_request_body_size=1234,
+            max_request_body_size=4096,
             reactor=self.reactor,
         )
 
@@ -773,7 +773,7 @@ class FederatingHomeserverTestCase(HomeserverTestCase):
                         verify_key_id,
                         FetchKeyResult(
                             verify_key=verify_key,
-                            valid_until_ts=clock.time_msec() + 1000,
+                            valid_until_ts=clock.time_msec() + 10000,
                         ),
                     )
                 ],
diff --git a/tests/utils.py b/tests/utils.py
index 424cc4c2a0..d2c6d1e852 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -167,6 +167,7 @@ def default_config(
             "local": {"per_second": 10000, "burst_count": 10000},
             "remote": {"per_second": 10000, "burst_count": 10000},
         },
+        "rc_joins_per_room": {"per_second": 10000, "burst_count": 10000},
         "rc_invites": {
             "per_room": {"per_second": 10000, "burst_count": 10000},
             "per_user": {"per_second": 10000, "burst_count": 10000},

From 87a917e8c8bd8d999e0a4f5badb58a470ab83ca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Behrmann?= <behrmann@physik.fu-berlin.de>
Date: Tue, 19 Jul 2022 14:36:29 +0200
Subject: [PATCH 152/178] Add notes when config options were changed to config
 documentation (#13314)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jörg Behrmann <behrmann@physik.fu-berlin.de>
---
 changelog.d/13314.doc                            | 1 +
 docs/usage/configuration/config_documentation.md | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/13314.doc

diff --git a/changelog.d/13314.doc b/changelog.d/13314.doc
new file mode 100644
index 0000000000..75c71ef27a
--- /dev/null
+++ b/changelog.d/13314.doc
@@ -0,0 +1 @@
+Add notes when config options where changed. Contributed by @behrmann.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index be272a400c..995c5052f1 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -239,6 +239,8 @@ If this option is provided, it parses the given yaml to json and
 serves it on `/.well-known/matrix/client` endpoint
 alongside the standard properties.
 
+*Added in Synapse 1.62.0.*
+
 Example configuration:
 ```yaml
 extra_well_known_client_content : 
@@ -1155,6 +1157,9 @@ Caching can be configured through the following sub-options:
   with intermittent connections, at the cost of higher memory usage.
   A value of zero means that sync responses are not cached.
   Defaults to 2m.
+
+  *Changed in Synapse 1.62.0*: The default was changed from 0 to 2m.
+
 * `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and
    `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory 
    usage and cache entry availability. You must be using [jemalloc](https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) 

From 84c5e6b1fd67931c919901c733647ff0f61e2759 Mon Sep 17 00:00:00 2001
From: villepeh <100730729+villepeh@users.noreply.github.com>
Date: Tue, 19 Jul 2022 15:37:20 +0300
Subject: [PATCH 153/178] Bash script for creating multiple stream writers
 (#13271)

Add another bash script to the contrib directory. It creates multiple stream writers and also prints out the example configuration for homeserver.yaml.

Signed-off-by: Ville Petteri Huh.
---
 changelog.d/13271.doc                         |   1 +
 ....md => create-multiple-generic-workers.md} |   2 +-
 .../create-multiple-stream-writers.md         | 145 ++++++++++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13271.doc
 rename contrib/workers-bash-scripts/{create-multiple-workers.md => create-multiple-generic-workers.md} (93%)
 create mode 100644 contrib/workers-bash-scripts/create-multiple-stream-writers.md

diff --git a/changelog.d/13271.doc b/changelog.d/13271.doc
new file mode 100644
index 0000000000..b50e60d029
--- /dev/null
+++ b/changelog.d/13271.doc
@@ -0,0 +1 @@
+Add another `contrib` script to help set up worker processes. Contributed by @villepeh.
diff --git a/contrib/workers-bash-scripts/create-multiple-workers.md b/contrib/workers-bash-scripts/create-multiple-generic-workers.md
similarity index 93%
rename from contrib/workers-bash-scripts/create-multiple-workers.md
rename to contrib/workers-bash-scripts/create-multiple-generic-workers.md
index ad5142fe15..d303101429 100644
--- a/contrib/workers-bash-scripts/create-multiple-workers.md
+++ b/contrib/workers-bash-scripts/create-multiple-generic-workers.md
@@ -1,4 +1,4 @@
-# Creating multiple workers with a bash script
+# Creating multiple generic workers with a bash script
 
 Setting up multiple worker configuration files manually can be time-consuming.
 You can alternatively create multiple worker configuration files with a simple `bash` script. For example:
diff --git a/contrib/workers-bash-scripts/create-multiple-stream-writers.md b/contrib/workers-bash-scripts/create-multiple-stream-writers.md
new file mode 100644
index 0000000000..0d2ca780a6
--- /dev/null
+++ b/contrib/workers-bash-scripts/create-multiple-stream-writers.md
@@ -0,0 +1,145 @@
+# Creating multiple stream writers with a bash script
+
+This script creates multiple [stream writer](https://github.com/matrix-org/synapse/blob/develop/docs/workers.md#stream-writers) workers.
+
+Stream writers require both replication and HTTP listeners.
+
+It also prints out the example lines for Synapse main configuration file.
+
+Remember to route necessary endpoints directly to a worker associated with it.
+
+If you run the script as-is, it will create workers with the replication listener starting from port 8034 and another, regular http listener starting from 8044. If you don't need all of the stream writers listed in the script, just remove them from the ```STREAM_WRITERS``` array.
+
+```sh
+#!/bin/bash
+
+# Start with these replication and http ports.
+# The script loop starts with the exact port and then increments it by one.
+REP_START_PORT=8034
+HTTP_START_PORT=8044
+
+# Stream writer workers to generate. Feel free to add or remove them as you wish.
+# Event persister ("events") isn't included here as it does not require its
+# own HTTP listener.
+
+STREAM_WRITERS+=( "presence" "typing" "receipts" "to_device" "account_data" )
+
+NUM_WRITERS=$(expr ${#STREAM_WRITERS[@]})
+
+i=0
+
+while [ $i -lt "$NUM_WRITERS" ]
+do
+cat << EOF > ${STREAM_WRITERS[$i]}_stream_writer.yaml
+worker_app: synapse.app.generic_worker
+worker_name: ${STREAM_WRITERS[$i]}_stream_writer
+
+# The replication listener on the main synapse process.
+worker_replication_host: 127.0.0.1
+worker_replication_http_port: 9093
+
+worker_listeners:
+  - type: http
+    port: $(expr $REP_START_PORT + $i)
+    resources:
+      - names: [replication]
+
+  - type: http
+    port: $(expr $HTTP_START_PORT + $i)
+    resources:
+      - names: [client]
+
+worker_log_config: /etc/matrix-synapse/stream-writer-log.yaml
+EOF
+HOMESERVER_YAML_INSTANCE_MAP+=$"  ${STREAM_WRITERS[$i]}_stream_writer:
+    host: 127.0.0.1
+    port: $(expr $REP_START_PORT + $i)
+"
+
+HOMESERVER_YAML_STREAM_WRITERS+=$"  ${STREAM_WRITERS[$i]}: ${STREAM_WRITERS[$i]}_stream_writer
+"
+
+((i++))
+done
+
+cat << EXAMPLECONFIG
+# Add these lines to your homeserver.yaml.
+# Don't forget to configure your reverse proxy and
+# necessary endpoints to their respective worker.
+
+# See https://github.com/matrix-org/synapse/blob/develop/docs/workers.md
+# for more information.
+
+# Remember: Under NO circumstances should the replication
+# listener be exposed to the public internet;
+# it has no authentication and is unencrypted.
+
+instance_map:
+$HOMESERVER_YAML_INSTANCE_MAP
+stream_writers:
+$HOMESERVER_YAML_STREAM_WRITERS
+EXAMPLECONFIG
+```
+
+Copy the code above save it to a file ```create_stream_writers.sh``` (for example).
+
+Make the script executable by running ```chmod +x create_stream_writers.sh```.
+
+## Run the script to create workers and print out a sample configuration
+
+Simply run the script to create YAML files in the current folder and print out the required configuration for ```homeserver.yaml```.
+
+```console
+$ ./create_stream_writers.sh
+
+# Add these lines to your homeserver.yaml.
+# Don't forget to configure your reverse proxy and
+# necessary endpoints to their respective worker.
+
+# See https://github.com/matrix-org/synapse/blob/develop/docs/workers.md
+# for more information
+
+# Remember: Under NO circumstances should the replication
+# listener be exposed to the public internet;
+# it has no authentication and is unencrypted.
+
+instance_map:
+  presence_stream_writer:
+    host: 127.0.0.1
+    port: 8034
+  typing_stream_writer:
+    host: 127.0.0.1
+    port: 8035
+  receipts_stream_writer:
+    host: 127.0.0.1
+    port: 8036
+  to_device_stream_writer:
+    host: 127.0.0.1
+    port: 8037
+  account_data_stream_writer:
+    host: 127.0.0.1
+    port: 8038
+
+stream_writers:
+  presence: presence_stream_writer
+  typing: typing_stream_writer
+  receipts: receipts_stream_writer
+  to_device: to_device_stream_writer
+  account_data: account_data_stream_writer
+```
+
+Simply copy-and-paste the output to an appropriate place in your Synapse main configuration file.
+
+## Write directly to Synapse configuration file
+
+You could also write the output directly to homeserver main configuration file. **This, however, is not recommended** as even a small typo (such as replacing >> with >) can erase the entire ```homeserver.yaml```. 
+
+If you do this, back up your original configuration file first:
+
+```console
+# Back up homeserver.yaml first
+cp /etc/matrix-synapse/homeserver.yaml /etc/matrix-synapse/homeserver.yaml.bak 
+
+# Create workers and write output to your homeserver.yaml
+./create_stream_writers.sh >> /etc/matrix-synapse/homeserver.yaml 
+```

From 6faaf76a3239c3fd1a9adc0b45b04fcf43237824 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 19 Jul 2022 13:38:29 +0100
Subject: [PATCH 154/178] Remove 'anonymised' from the phone home stats
 documentation (#13321)

---
 CHANGES.md                                         |  2 +-
 changelog.d/13321.doc                              |  1 +
 debian/changelog                                   |  7 +++++++
 debian/matrix-synapse-py3.postinst                 |  2 +-
 debian/po/templates.pot                            | 12 ++++++------
 debian/templates                                   | 13 +++++++------
 docs/SUMMARY.md                                    |  2 +-
 ...md => reporting_homeserver_usage_statistics.md} |  8 ++++----
 docs/usage/configuration/config_documentation.md   | 11 ++++++++---
 synapse/_scripts/generate_config.py                |  2 +-
 synapse/config/_base.py                            | 14 +++++++-------
 11 files changed, 44 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/13321.doc
 rename docs/usage/administration/monitoring/{reporting_anonymised_statistics.md => reporting_homeserver_usage_statistics.md} (98%)

diff --git a/CHANGES.md b/CHANGES.md
index 4071f973de..a1f918986f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -34,7 +34,7 @@ Improved Documentation
 - Add an explanation of the `--report-stats` argument to the docs. ([\#13029](https://github.com/matrix-org/synapse/issues/13029))
 - Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. ([\#13032](https://github.com/matrix-org/synapse/issues/13032))
 - Add missing links to config options. ([\#13166](https://github.com/matrix-org/synapse/issues/13166))
-- Add documentation for anonymised homeserver statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086))
+- Add documentation for homeserver usage statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086))
 - Add documentation for the existing `databases` option in the homeserver configuration manual. ([\#13212](https://github.com/matrix-org/synapse/issues/13212))
 - Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077), [\#13139](https://github.com/matrix-org/synapse/issues/13139))
 - Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132))
diff --git a/changelog.d/13321.doc b/changelog.d/13321.doc
new file mode 100644
index 0000000000..a7469e9242
--- /dev/null
+++ b/changelog.d/13321.doc
@@ -0,0 +1 @@
+Clarify that homeserver server names are included in the reported data when the `report_stats` config option is enabled.
diff --git a/debian/changelog b/debian/changelog
index 9f4352586d..a9dbe34705 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+matrix-synapse-py3 (1.63.0~rc1+nmu1) UNRELEASED; urgency=medium
+
+  * Clarify that homeserver server names are included in the data reported
+    by opt-in server stats reporting (`report_stats` homeserver config option).
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 19 Jul 2022 12:00:14 +0100
+
 matrix-synapse-py3 (1.63.0~rc1) stable; urgency=medium
 
   * New Synapse release 1.63.0rc1.
diff --git a/debian/matrix-synapse-py3.postinst b/debian/matrix-synapse-py3.postinst
index a8dde1e082..029b9e0243 100644
--- a/debian/matrix-synapse-py3.postinst
+++ b/debian/matrix-synapse-py3.postinst
@@ -31,7 +31,7 @@ EOF
 # This file is autogenerated, and will be recreated on upgrade if it is deleted.
 # Any changes you make will be preserved.
 
-# Whether to report anonymized homeserver usage statistics.
+# Whether to report homeserver usage statistics.
 report_stats: false
 EOF
     fi
diff --git a/debian/po/templates.pot b/debian/po/templates.pot
index f0af9e70fb..445e4aac81 100644
--- a/debian/po/templates.pot
+++ b/debian/po/templates.pot
@@ -37,7 +37,7 @@ msgstr ""
 #. Type: boolean
 #. Description
 #: ../templates:2001
-msgid "Report anonymous statistics?"
+msgid "Report homeserver usage statistics?"
 msgstr ""
 
 #. Type: boolean
@@ -45,11 +45,11 @@ msgstr ""
 #: ../templates:2001
 msgid ""
 "Developers of Matrix and Synapse really appreciate helping the project out "
-"by reporting anonymized usage statistics from this homeserver. Only very "
-"basic aggregate data (e.g. number of users) will be reported, but it helps "
-"track the growth of the Matrix community, and helps in making Matrix a "
-"success, as well as to convince other networks that they should peer with "
-"Matrix."
+"by reporting homeserver usage statistics from this homeserver. Your "
+"homeserver's server name, along with very basic aggregate data (e.g. "
+"number of users) will be reported. But it helps track the growth of the "
+"Matrix community, and helps in making Matrix a success, as well as to "
+"convince other networks that they should peer with Matrix."
 msgstr ""
 
 #. Type: boolean
diff --git a/debian/templates b/debian/templates
index 458fe8bbe9..23e24e1059 100644
--- a/debian/templates
+++ b/debian/templates
@@ -10,12 +10,13 @@ _Description: Name of the server:
 Template: matrix-synapse/report-stats
 Type: boolean
 Default: false
-_Description: Report anonymous statistics?
+_Description: Report homeserver usage statistics?
  Developers of Matrix and Synapse really appreciate helping the
- project out by reporting anonymized usage statistics from this
- homeserver. Only very basic aggregate data (e.g. number of users)
- will be reported, but it helps track the growth of the Matrix
- community, and helps in making Matrix a success, as well as to
- convince other networks that they should peer with Matrix.
+ project out by reporting homeserver usage statistics from this
+ homeserver. Your homeserver's server name, along with very basic
+ aggregate data (e.g. number of users) will be reported. But it
+ helps track the growth of the Matrix community, and helps in
+ making Matrix a success, as well as to convince other networks
+ that they should peer with Matrix.
  .
  Thank you.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 8d6030e34a..2e8c4e2137 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -69,7 +69,7 @@
       - [Federation](usage/administration/admin_api/federation.md)
     - [Manhole](manhole.md)
     - [Monitoring](metrics-howto.md)
-      - [Reporting Anonymised Statistics](usage/administration/monitoring/reporting_anonymised_statistics.md)
+      - [Reporting Homeserver Usage Statistics](usage/administration/monitoring/reporting_homeserver_usage_statistics.md)
     - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md)
     - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md)
     - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md)
diff --git a/docs/usage/administration/monitoring/reporting_anonymised_statistics.md b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
similarity index 98%
rename from docs/usage/administration/monitoring/reporting_anonymised_statistics.md
rename to docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
index 4f1e0fecf5..4e53f9883a 100644
--- a/docs/usage/administration/monitoring/reporting_anonymised_statistics.md
+++ b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
@@ -1,11 +1,11 @@
-# Reporting Anonymised Statistics
+# Reporting Homeserver Usage Statistics
 
 When generating your Synapse configuration file, you are asked whether you
-would like to report anonymised statistics to Matrix.org. These statistics
+would like to report usage statistics to Matrix.org. These statistics
 provide the foundation a glimpse into the number of Synapse homeservers
 participating in the network, as well as statistics such as the number of
 rooms being created and messages being sent. This feature is sometimes
-affectionately called "phone-home" stats. Reporting
+affectionately called "phone home" stats. Reporting
 [is optional](../../configuration/config_documentation.md#report_stats)
 and the reporting endpoint
 [can be configured](../../configuration/config_documentation.md#report_stats_endpoint),
@@ -21,9 +21,9 @@ The following statistics are sent to the configured reporting endpoint:
 
 | Statistic Name             | Type   | Description                                                                                                                                                                                                                                                                                     |
 |----------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `homeserver`               | string | The homeserver's server name.                                                                                                                                                                                                                                                                   |
 | `memory_rss`               | int    | The memory usage of the process (in kilobytes on Unix-based systems, bytes on MacOS).                                                                                                                                                                                                           |
 | `cpu_average`              | int    | CPU time in % of a single core (not % of all cores).                                                                                                                                                                                                                                            |              
-| `homeserver`               | string | The homeserver's server name.                                                                                                                                                                                                                                                                   |
 | `server_context`           | string | An arbitrary string used to group statistics from a set of homeservers.                                                                                                                                                                                                                         |
 | `timestamp`                | int    | The current time, represented as the number of seconds since the epoch.                                                                                                                                                                                                                         |                 
 | `uptime_seconds`           | int    | The number of seconds since the homeserver was last started.                                                                                                                                                                                                                                    |
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 5deabb53d7..6d7d700a13 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2409,9 +2409,14 @@ metrics_flags:
 ---
 ### `report_stats`
 
-Whether or not to report anonymized homeserver usage statistics. This is originally
+Whether or not to report homeserver usage statistics. This is originally
 set when generating the config. Set this option to true or false to change the current
-behavior. 
+behavior. See
+[Reporting Homeserver Usage Statistics](../administration/monitoring/reporting_homeserver_usage_statistics.md)
+for information on what data is reported.
+
+Statistics will be reported 5 minutes after Synapse starts, and then every 3 hours
+after that.
 
 Example configuration:
 ```yaml
@@ -2420,7 +2425,7 @@ report_stats: true
 ---
 ### `report_stats_endpoint`
 
-The endpoint to report the anonymized homeserver usage statistics to.
+The endpoint to report homeserver usage statistics to.
 Defaults to https://matrix.org/report-usage-stats/push
 
 Example configuration:
diff --git a/synapse/_scripts/generate_config.py b/synapse/_scripts/generate_config.py
index 08eb8ef114..06c11c60da 100755
--- a/synapse/_scripts/generate_config.py
+++ b/synapse/_scripts/generate_config.py
@@ -33,7 +33,7 @@ def main() -> None:
     parser.add_argument(
         "--report-stats",
         action="store",
-        help="Whether the generated config reports anonymized usage statistics",
+        help="Whether the generated config reports homeserver usage statistics",
         choices=["yes", "no"],
     )
 
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 095eca16c5..7c9cf403ef 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -97,16 +97,16 @@ def format_config_error(e: ConfigError) -> Iterator[str]:
 # We split these messages out to allow packages to override with package
 # specific instructions.
 MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\
-Please opt in or out of reporting anonymized homeserver usage statistics, by
-setting the `report_stats` key in your config file to either True or False.
+Please opt in or out of reporting homeserver usage statistics, by setting
+the `report_stats` key in your config file to either True or False.
 """
 
 MISSING_REPORT_STATS_SPIEL = """\
 We would really appreciate it if you could help our project out by reporting
-anonymized usage statistics from your homeserver. Only very basic aggregate
-data (e.g. number of users) will be reported, but it helps us to track the
-growth of the Matrix community, and helps us to make Matrix a success, as well
-as to convince other networks that they should peer with us.
+homeserver usage statistics from your homeserver. Your homeserver's server name,
+along with very basic aggregate data (e.g. number of users) will be reported. But
+it helps us to track the growth of the Matrix community, and helps us to make Matrix
+a success, as well as to convince other networks that they should peer with us.
 
 Thank you.
 """
@@ -621,7 +621,7 @@ class RootConfig:
         generate_group.add_argument(
             "--report-stats",
             action="store",
-            help="Whether the generated config reports anonymized usage statistics.",
+            help="Whether the generated config reports homeserver usage statistics.",
             choices=["yes", "no"],
         )
         generate_group.add_argument(

From 097afd0e0b49099cae2f5c03abc15f3a66894405 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 19 Jul 2022 14:43:28 +0200
Subject: [PATCH 155/178] 1.63.0

---
 CHANGES.md            | 9 +++++++++
 changelog.d/13321.doc | 1 -
 debian/changelog      | 5 +++--
 pyproject.toml        | 2 +-
 4 files changed, 13 insertions(+), 4 deletions(-)
 delete mode 100644 changelog.d/13321.doc

diff --git a/CHANGES.md b/CHANGES.md
index a1f918986f..c8da19676c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,12 @@
+Synapse 1.63.0 (2022-07-19)
+===========================
+
+Improved Documentation
+----------------------
+
+- Clarify that homeserver server names are included in the reported data when the `report_stats` config option is enabled. ([\#13321](https://github.com/matrix-org/synapse/issues/13321))
+
+
 Synapse 1.63.0rc1 (2022-07-12)
 ==============================
 
diff --git a/changelog.d/13321.doc b/changelog.d/13321.doc
deleted file mode 100644
index a7469e9242..0000000000
--- a/changelog.d/13321.doc
+++ /dev/null
@@ -1 +0,0 @@
-Clarify that homeserver server names are included in the reported data when the `report_stats` config option is enabled.
diff --git a/debian/changelog b/debian/changelog
index a9dbe34705..b1635bc392 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,9 +1,10 @@
-matrix-synapse-py3 (1.63.0~rc1+nmu1) UNRELEASED; urgency=medium
+matrix-synapse-py3 (1.63.0) stable; urgency=medium
 
   * Clarify that homeserver server names are included in the data reported
     by opt-in server stats reporting (`report_stats` homeserver config option).
+  * New Synapse release 1.63.0.
 
- -- Synapse Packaging team <packages@matrix.org>  Tue, 19 Jul 2022 12:00:14 +0100
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 19 Jul 2022 14:42:24 +0200
 
 matrix-synapse-py3 (1.63.0~rc1) stable; urgency=medium
 
diff --git a/pyproject.toml b/pyproject.toml
index f77c02ca27..74a473bd34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.63.0rc1"
+version = "1.63.0"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From 6fccd72f42ac76b185cc20ed258792ff94ee81fb Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 19 Jul 2022 14:53:12 +0200
Subject: [PATCH 156/178] Improve precision on validation improvements

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index c8da19676c..7eac6131b1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -15,7 +15,7 @@ Features
 
 - Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125))
 - Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of `/publicRooms` by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031))
-- Improve validation logic in Synapse's REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148))
+- Improve validation logic in the account data REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148))
 
 
 Bugfixes

From 1efe6b8c41082fca68a6c408a854b25991814d65 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 19 Jul 2022 09:08:46 -0400
Subject: [PATCH 157/178] Stop building Ubuntu 21.10 (Impish Indri) which is
 end of life. (#13326)

---
 changelog.d/13326.removal            | 1 +
 scripts-dev/build_debian_packages.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 changelog.d/13326.removal

diff --git a/changelog.d/13326.removal b/changelog.d/13326.removal
new file mode 100644
index 0000000000..8112286671
--- /dev/null
+++ b/changelog.d/13326.removal
@@ -0,0 +1 @@
+Stop builindg `.deb` packages for Ubuntu 21.10 (Impish Indri), which has reached end of life.
diff --git a/scripts-dev/build_debian_packages.py b/scripts-dev/build_debian_packages.py
index 38564893e9..cd2e64b75f 100755
--- a/scripts-dev/build_debian_packages.py
+++ b/scripts-dev/build_debian_packages.py
@@ -26,7 +26,6 @@ DISTS = (
     "debian:bookworm",
     "debian:sid",
     "ubuntu:focal",  # 20.04 LTS (our EOL forced by Py38 on 2024-10-14)
-    "ubuntu:impish",  # 21.10  (EOL 2022-07)
     "ubuntu:jammy",  # 22.04 LTS (EOL 2027-04)
 )
 

From de70b25e844035c26a1bc40fbf6c8982e5d37b45 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 19 Jul 2022 14:40:37 +0100
Subject: [PATCH 158/178] Reduce memory usage of state group cache (#13323)

---
 changelog.d/13323.misc                        | 1 +
 synapse/storage/databases/state/bg_updates.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13323.misc

diff --git a/changelog.d/13323.misc b/changelog.d/13323.misc
new file mode 100644
index 0000000000..3caa94a2f6
--- /dev/null
+++ b/changelog.d/13323.misc
@@ -0,0 +1 @@
+Reduce memory usage of state caches.
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index fa9eadaca7..a7fcc564a9 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -24,6 +24,7 @@ from synapse.storage.database import (
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.state import StateFilter
 from synapse.types import MutableStateMap, StateMap
+from synapse.util.caches import intern_string
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -136,7 +137,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
                 txn.execute(sql % (where_clause,), args)
                 for row in txn:
                     typ, state_key, event_id = row
-                    key = (typ, state_key)
+                    key = (intern_string(typ), intern_string(state_key))
                     results[group][key] = event_id
         else:
             max_entries_returned = state_filter.max_entries_returned()

From a6895dd576f96d7fd086fb4128d48ac8a3f098c5 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 19 Jul 2022 14:14:30 -0400
Subject: [PATCH 159/178] Add type annotations to `trace` decorator. (#13328)

Functions that are decorated with `trace` are now properly typed
and the type hints for them are fixed.
---
 changelog.d/13328.misc                        |  1 +
 synapse/federation/federation_client.py       |  2 +-
 synapse/federation/transport/client.py        |  2 +-
 synapse/handlers/e2e_keys.py                  | 16 +++---
 synapse/logging/opentracing.py                | 50 +++++++++++--------
 synapse/replication/http/_base.py             |  4 +-
 synapse/rest/client/keys.py                   |  4 +-
 synapse/rest/client/room_keys.py              | 13 +++--
 synapse/rest/client/sendtodevice.py           |  4 +-
 synapse/rest/client/sync.py                   | 12 ++---
 synapse/storage/databases/main/devices.py     |  2 +-
 .../storage/databases/main/end_to_end_keys.py | 47 ++++++++++++++---
 12 files changed, 102 insertions(+), 55 deletions(-)
 create mode 100644 changelog.d/13328.misc

diff --git a/changelog.d/13328.misc b/changelog.d/13328.misc
new file mode 100644
index 0000000000..d15fb5fc37
--- /dev/null
+++ b/changelog.d/13328.misc
@@ -0,0 +1 @@
+Add type hints to `trace` decorator.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 66e6305562..7c450ecad0 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -217,7 +217,7 @@ class FederationClient(FederationBase):
         )
 
     async def claim_client_keys(
-        self, destination: str, content: JsonDict, timeout: int
+        self, destination: str, content: JsonDict, timeout: Optional[int]
     ) -> JsonDict:
         """Claims one-time keys for a device hosted on a remote server.
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 9e84bd677e..32074b8ca6 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -619,7 +619,7 @@ class TransportLayerClient:
         )
 
     async def claim_client_keys(
-        self, destination: str, query_content: JsonDict, timeout: int
+        self, destination: str, query_content: JsonDict, timeout: Optional[int]
     ) -> JsonDict:
         """Claim one-time keys for a list of devices hosted on a remote server.
 
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 52bb5c9c55..84c28c480e 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple
 
 import attr
 from canonicaljson import encode_canonical_json
@@ -92,7 +92,11 @@ class E2eKeysHandler:
 
     @trace
     async def query_devices(
-        self, query_body: JsonDict, timeout: int, from_user_id: str, from_device_id: str
+        self,
+        query_body: JsonDict,
+        timeout: int,
+        from_user_id: str,
+        from_device_id: Optional[str],
     ) -> JsonDict:
         """Handle a device key query from a client
 
@@ -120,9 +124,7 @@ class E2eKeysHandler:
                 the number of in-flight queries at a time.
         """
         async with self._query_devices_linearizer.queue((from_user_id, from_device_id)):
-            device_keys_query: Dict[str, Iterable[str]] = query_body.get(
-                "device_keys", {}
-            )
+            device_keys_query: Dict[str, List[str]] = query_body.get("device_keys", {})
 
             # separate users by domain.
             # make a map from domain to user_id to device_ids
@@ -392,7 +394,7 @@ class E2eKeysHandler:
 
     @trace
     async def query_local_devices(
-        self, query: Dict[str, Optional[List[str]]]
+        self, query: Mapping[str, Optional[List[str]]]
     ) -> Dict[str, Dict[str, dict]]:
         """Get E2E device keys for local users
 
@@ -461,7 +463,7 @@ class E2eKeysHandler:
 
     @trace
     async def claim_one_time_keys(
-        self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: int
+        self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: Optional[int]
     ) -> JsonDict:
         local_query: List[Tuple[str, str, str]] = []
         remote_queries: Dict[str, Dict[str, Dict[str, str]]] = {}
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 50c57940f9..17e729f0c7 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -84,14 +84,13 @@ the function becomes the operation name for the span.
        return something_usual_and_useful
 
 
-Operation names can be explicitly set for a function by passing the
-operation name to ``trace``
+Operation names can be explicitly set for a function by using ``trace_with_opname``:
 
 .. code-block:: python
 
-   from synapse.logging.opentracing import trace
+   from synapse.logging.opentracing import trace_with_opname
 
-   @trace(opname="a_better_operation_name")
+   @trace_with_opname("a_better_operation_name")
    def interesting_badly_named_function(*args, **kwargs):
        # Does all kinds of cool and expected things
        return something_usual_and_useful
@@ -798,33 +797,31 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte
 # Tracing decorators
 
 
-def trace(func=None, opname: Optional[str] = None):
+def trace_with_opname(opname: str) -> Callable[[Callable[P, R]], Callable[P, R]]:
     """
-    Decorator to trace a function.
-    Sets the operation name to that of the function's or that given
-    as operation_name. See the module's doc string for usage
-    examples.
+    Decorator to trace a function with a custom opname.
+
+    See the module's doc string for usage examples.
+
     """
 
-    def decorator(func):
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
         if opentracing is None:
             return func  # type: ignore[unreachable]
 
-        _opname = opname if opname else func.__name__
-
         if inspect.iscoroutinefunction(func):
 
             @wraps(func)
-            async def _trace_inner(*args, **kwargs):
-                with start_active_span(_opname):
-                    return await func(*args, **kwargs)
+            async def _trace_inner(*args: P.args, **kwargs: P.kwargs) -> R:
+                with start_active_span(opname):
+                    return await func(*args, **kwargs)  # type: ignore[misc]
 
         else:
             # The other case here handles both sync functions and those
             # decorated with inlineDeferred.
             @wraps(func)
-            def _trace_inner(*args, **kwargs):
-                scope = start_active_span(_opname)
+            def _trace_inner(*args: P.args, **kwargs: P.kwargs) -> R:
+                scope = start_active_span(opname)
                 scope.__enter__()
 
                 try:
@@ -858,12 +855,21 @@ def trace(func=None, opname: Optional[str] = None):
                     scope.__exit__(type(e), None, e.__traceback__)
                     raise
 
-        return _trace_inner
+        return _trace_inner  # type: ignore[return-value]
 
-    if func:
-        return decorator(func)
-    else:
-        return decorator
+    return decorator
+
+
+def trace(func: Callable[P, R]) -> Callable[P, R]:
+    """
+    Decorator to trace a function.
+
+    Sets the operation name to that of the function's name.
+
+    See the module's doc string for usage examples.
+    """
+
+    return trace_with_opname(func.__name__)(func)
 
 
 def tag_args(func: Callable[P, R]) -> Callable[P, R]:
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index a4ae4040c3..561ad5bf04 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -29,7 +29,7 @@ from synapse.http import RequestTimedOutError
 from synapse.http.server import HttpServer, is_method_cancellable
 from synapse.http.site import SynapseRequest
 from synapse.logging import opentracing
-from synapse.logging.opentracing import trace
+from synapse.logging.opentracing import trace_with_opname
 from synapse.types import JsonDict
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import random_string
@@ -196,7 +196,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                 "ascii"
             )
 
-        @trace(opname="outgoing_replication_request")
+        @trace_with_opname("outgoing_replication_request")
         async def send_request(*, instance_name: str = "master", **kwargs: Any) -> Any:
             with outgoing_gauge.track_inprogress():
                 if instance_name == local_instance_name:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index ce806e3c11..eb1b85721f 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -26,7 +26,7 @@ from synapse.http.servlet import (
     parse_string,
 )
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.logging.opentracing import log_kv, set_tag, trace_with_opname
 from synapse.types import JsonDict, StreamToken
 
 from ._base import client_patterns, interactive_auth_handler
@@ -71,7 +71,7 @@ class KeyUploadServlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
         self.device_handler = hs.get_device_handler()
 
-    @trace(opname="upload_keys")
+    @trace_with_opname("upload_keys")
     async def on_POST(
         self, request: SynapseRequest, device_id: Optional[str]
     ) -> Tuple[int, JsonDict]:
diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py
index 37e39570f6..f7081f638e 100644
--- a/synapse/rest/client/room_keys.py
+++ b/synapse/rest/client/room_keys.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Optional, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple, cast
 
 from synapse.api.errors import Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
@@ -127,7 +127,7 @@ class RoomKeysServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
         user_id = requester.user.to_string()
         body = parse_json_object_from_request(request)
-        version = parse_string(request, "version")
+        version = parse_string(request, "version", required=True)
 
         if session_id:
             body = {"sessions": {session_id: body}}
@@ -196,8 +196,11 @@ class RoomKeysServlet(RestServlet):
         user_id = requester.user.to_string()
         version = parse_string(request, "version", required=True)
 
-        room_keys = await self.e2e_room_keys_handler.get_room_keys(
-            user_id, version, room_id, session_id
+        room_keys = cast(
+            JsonDict,
+            await self.e2e_room_keys_handler.get_room_keys(
+                user_id, version, room_id, session_id
+            ),
         )
 
         # Convert room_keys to the right format to return.
@@ -240,7 +243,7 @@ class RoomKeysServlet(RestServlet):
 
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
         user_id = requester.user.to_string()
-        version = parse_string(request, "version")
+        version = parse_string(request, "version", required=True)
 
         ret = await self.e2e_room_keys_handler.delete_room_keys(
             user_id, version, room_id, session_id
diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py
index 3322c8ef48..1a8e9a96d4 100644
--- a/synapse/rest/client/sendtodevice.py
+++ b/synapse/rest/client/sendtodevice.py
@@ -19,7 +19,7 @@ from synapse.http import servlet
 from synapse.http.server import HttpServer
 from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import set_tag, trace
+from synapse.logging.opentracing import set_tag, trace_with_opname
 from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.types import JsonDict
 
@@ -43,7 +43,7 @@ class SendToDeviceRestServlet(servlet.RestServlet):
         self.txns = HttpTransactionCache(hs)
         self.device_message_handler = hs.get_device_message_handler()
 
-    @trace(opname="sendToDevice")
+    @trace_with_opname("sendToDevice")
     def on_PUT(
         self, request: SynapseRequest, message_type: str, txn_id: str
     ) -> Awaitable[Tuple[int, JsonDict]]:
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index 8bbf35148d..c2989765ce 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -37,7 +37,7 @@ from synapse.handlers.sync import (
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import trace
+from synapse.logging.opentracing import trace_with_opname
 from synapse.types import JsonDict, StreamToken
 from synapse.util import json_decoder
 
@@ -210,7 +210,7 @@ class SyncRestServlet(RestServlet):
         logger.debug("Event formatting complete")
         return 200, response_content
 
-    @trace(opname="sync.encode_response")
+    @trace_with_opname("sync.encode_response")
     async def encode_response(
         self,
         time_now: int,
@@ -315,7 +315,7 @@ class SyncRestServlet(RestServlet):
             ]
         }
 
-    @trace(opname="sync.encode_joined")
+    @trace_with_opname("sync.encode_joined")
     async def encode_joined(
         self,
         rooms: List[JoinedSyncResult],
@@ -340,7 +340,7 @@ class SyncRestServlet(RestServlet):
 
         return joined
 
-    @trace(opname="sync.encode_invited")
+    @trace_with_opname("sync.encode_invited")
     async def encode_invited(
         self,
         rooms: List[InvitedSyncResult],
@@ -371,7 +371,7 @@ class SyncRestServlet(RestServlet):
 
         return invited
 
-    @trace(opname="sync.encode_knocked")
+    @trace_with_opname("sync.encode_knocked")
     async def encode_knocked(
         self,
         rooms: List[KnockedSyncResult],
@@ -420,7 +420,7 @@ class SyncRestServlet(RestServlet):
 
         return knocked
 
-    @trace(opname="sync.encode_archived")
+    @trace_with_opname("sync.encode_archived")
     async def encode_archived(
         self,
         rooms: List[ArchivedSyncResult],
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index adde5d0978..7a6ed332aa 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -669,7 +669,7 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
 
     @trace
     async def get_user_devices_from_cache(
-        self, query_list: List[Tuple[str, str]]
+        self, query_list: List[Tuple[str, Optional[str]]]
     ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]:
         """Get the devices (and keys if any) for remote users from the cache.
 
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 9b293475c8..60f622ad71 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -22,11 +22,14 @@ from typing import (
     List,
     Optional,
     Tuple,
+    Union,
     cast,
+    overload,
 )
 
 import attr
 from canonicaljson import encode_canonical_json
+from typing_extensions import Literal
 
 from synapse.api.constants import DeviceKeyAlgorithms
 from synapse.appservice import (
@@ -113,7 +116,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             user_devices = devices[user_id]
             results = []
             for device_id, device in user_devices.items():
-                result = {"device_id": device_id}
+                result: JsonDict = {"device_id": device_id}
 
                 keys = device.keys
                 if keys:
@@ -156,6 +159,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             rv[user_id] = {}
             for device_id, device_info in device_keys.items():
                 r = device_info.keys
+                if r is None:
+                    continue
+
                 r["unsigned"] = {}
                 display_name = device_info.display_name
                 if display_name is not None:
@@ -164,13 +170,42 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         return rv
 
+    @overload
+    async def get_e2e_device_keys_and_signatures(
+        self,
+        query_list: Collection[Tuple[str, Optional[str]]],
+        include_all_devices: Literal[False] = False,
+    ) -> Dict[str, Dict[str, DeviceKeyLookupResult]]:
+        ...
+
+    @overload
+    async def get_e2e_device_keys_and_signatures(
+        self,
+        query_list: Collection[Tuple[str, Optional[str]]],
+        include_all_devices: bool = False,
+        include_deleted_devices: Literal[False] = False,
+    ) -> Dict[str, Dict[str, DeviceKeyLookupResult]]:
+        ...
+
+    @overload
+    async def get_e2e_device_keys_and_signatures(
+        self,
+        query_list: Collection[Tuple[str, Optional[str]]],
+        include_all_devices: Literal[True],
+        include_deleted_devices: Literal[True],
+    ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]:
+        ...
+
     @trace
     async def get_e2e_device_keys_and_signatures(
         self,
-        query_list: List[Tuple[str, Optional[str]]],
+        query_list: Collection[Tuple[str, Optional[str]]],
         include_all_devices: bool = False,
         include_deleted_devices: bool = False,
-    ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]:
+    ) -> Union[
+        Dict[str, Dict[str, DeviceKeyLookupResult]],
+        Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]],
+    ]:
         """Fetch a list of device keys
 
         Any cross-signatures made on the keys by the owner of the device are also
@@ -1044,7 +1079,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                 _claim_e2e_one_time_key = _claim_e2e_one_time_key_simple
                 db_autocommit = False
 
-            row = await self.db_pool.runInteraction(
+            claim_row = await self.db_pool.runInteraction(
                 "claim_e2e_one_time_keys",
                 _claim_e2e_one_time_key,
                 user_id,
@@ -1052,11 +1087,11 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                 algorithm,
                 db_autocommit=db_autocommit,
             )
-            if row:
+            if claim_row:
                 device_results = results.setdefault(user_id, {}).setdefault(
                     device_id, {}
                 )
-                device_results[row[0]] = row[1]
+                device_results[claim_row[0]] = claim_row[1]
                 continue
 
             # No one-time key available, so see if there's a fallback

From 172ce29b149afb91bbf728b88bffb117172a8f2c Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 19 Jul 2022 19:15:54 +0100
Subject: [PATCH 160/178] Fix spurious warning when fetching state after a
 missing prev event (#13258)

---
 changelog.d/13258.misc               | 1 +
 synapse/handlers/federation_event.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/13258.misc

diff --git a/changelog.d/13258.misc b/changelog.d/13258.misc
new file mode 100644
index 0000000000..a187c46aa6
--- /dev/null
+++ b/changelog.d/13258.misc
@@ -0,0 +1 @@
+Fix spurious warning when fetching state after a missing prev event.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 766d9849f5..e4a5b64d10 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1037,6 +1037,9 @@ class FederationEventHandler:
         # XXX: this doesn't sound right? it means that we'll end up with incomplete
         #   state.
         failed_to_fetch = desired_events - event_metadata.keys()
+        # `event_id` could be missing from `event_metadata` because it's not necessarily
+        # a state event. We've already checked that we've fetched it above.
+        failed_to_fetch.discard(event_id)
         if failed_to_fetch:
             logger.warning(
                 "Failed to fetch missing state events for %s %s",

From b4ae3b0d445a5e294d4d3278b62b2add75840424 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 20 Jul 2022 12:06:13 +0100
Subject: [PATCH 161/178] Don't include appservice users when calculating push
 rules (#13332)

This can cause a lot of extra load on servers with lots of appservice users. Introduced in #13078
---
 changelog.d/13332.bugfix                 |  1 +
 synapse/push/bulk_push_rule_evaluator.py |  7 ++
 tests/push/test_push_rule_evaluator.py   | 85 ++++++++++++++++++++++++
 3 files changed, 93 insertions(+)
 create mode 100644 changelog.d/13332.bugfix

diff --git a/changelog.d/13332.bugfix b/changelog.d/13332.bugfix
new file mode 100644
index 0000000000..826ed1788f
--- /dev/null
+++ b/changelog.d/13332.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.63.0 where push actions were incorrectly calculated for appservice users. This caused performance issues on servers with large numbers of appservices.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index e581af9a9a..713dcf6950 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -131,6 +131,13 @@ class BulkPushRuleEvaluator:
 
         local_users = await self.store.get_local_users_in_room(event.room_id)
 
+        # Filter out appservice users.
+        local_users = [
+            u
+            for u in local_users
+            if not self.store.get_if_app_services_interested_in_user(u)
+        ]
+
         # if this event is an invite event, we may need to run rules for the user
         # who's been invited, otherwise they won't get told they've been invited
         if event.type == EventTypes.Member and event.membership == Membership.INVITE:
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 9b623d0033..718f489577 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -16,13 +16,23 @@ from typing import Dict, Optional, Set, Tuple, Union
 
 import frozendict
 
+from twisted.test.proto_helpers import MemoryReactor
+
+import synapse.rest.admin
+from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
+from synapse.appservice import ApplicationService
 from synapse.events import FrozenEvent
 from synapse.push import push_rule_evaluator
 from synapse.push.push_rule_evaluator import PushRuleEvaluatorForEvent
+from synapse.rest.client import login, register, room
+from synapse.server import HomeServer
+from synapse.storage.databases.main.appservice import _make_exclusive_regex
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests import unittest
+from tests.test_utils.event_injection import create_event, inject_member_event
 
 
 class PushRuleEvaluatorTestCase(unittest.TestCase):
@@ -354,3 +364,78 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             "event_type": "*.reaction",
         }
         self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
+
+
+class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
+    """Tests for the bulk push rule evaluator"""
+
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        register.register_servlets,
+        room.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer):
+        # Define an application service so that we can register appservice users
+        self._service_token = "some_token"
+        self._service = ApplicationService(
+            self._service_token,
+            "as1",
+            "@as.sender:test",
+            namespaces={
+                "users": [
+                    {"regex": "@_as_.*:test", "exclusive": True},
+                    {"regex": "@as.sender:test", "exclusive": True},
+                ]
+            },
+            msc3202_transaction_extensions=True,
+        )
+        self.hs.get_datastores().main.services_cache = [self._service]
+        self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex(
+            [self._service]
+        )
+
+        self._as_user, _ = self.register_appservice_user(
+            "_as_user", self._service_token
+        )
+
+        self.evaluator = self.hs.get_bulk_push_rule_evaluator()
+
+    def test_ignore_appservice_users(self) -> None:
+        "Test that we don't generate push for appservice users"
+
+        user_id = self.register_user("user", "pass")
+        token = self.login("user", "pass")
+
+        room_id = self.helper.create_room_as(user_id, tok=token)
+        self.get_success(
+            inject_member_event(self.hs, room_id, self._as_user, Membership.JOIN)
+        )
+
+        event, context = self.get_success(
+            create_event(
+                self.hs,
+                type=EventTypes.Message,
+                room_id=room_id,
+                sender=user_id,
+                content={"body": "test", "msgtype": "m.text"},
+            )
+        )
+
+        # Assert the returned push rules do not contain the app service user
+        rules = self.get_success(self.evaluator._get_rules_for_event(event))
+        self.assertTrue(self._as_user not in rules)
+
+        # Assert that no push actions have been added to the staging table (the
+        # sender should not be pushed for the event)
+        users_with_push_actions = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_onecol(
+                table="event_push_actions_staging",
+                keyvalues={"event_id": event.event_id},
+                retcol="user_id",
+                desc="test_ignore_appservice_users",
+            )
+        )
+
+        self.assertEqual(len(users_with_push_actions), 0)

From 93740cae5773536c77c142f84c09267c92866ead Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 20 Jul 2022 13:37:00 +0100
Subject: [PATCH 162/178] 1.63.1

---
 CHANGES.md               | 9 +++++++++
 changelog.d/13332.bugfix | 1 -
 debian/changelog         | 6 ++++++
 pyproject.toml           | 2 +-
 4 files changed, 16 insertions(+), 2 deletions(-)
 delete mode 100644 changelog.d/13332.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 7eac6131b1..143ab4e0e5 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,12 @@
+Synapse 1.63.1 (2022-07-20)
+===========================
+
+Bugfixes
+--------
+
+- Fix a bug introduced in Synapse 1.63.0 where push actions were incorrectly calculated for appservice users. This caused performance issues on servers with large numbers of appservices. ([\#13332](https://github.com/matrix-org/synapse/issues/13332))
+
+
 Synapse 1.63.0 (2022-07-19)
 ===========================
 
diff --git a/changelog.d/13332.bugfix b/changelog.d/13332.bugfix
deleted file mode 100644
index 826ed1788f..0000000000
--- a/changelog.d/13332.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug introduced in Synapse 1.63.0 where push actions were incorrectly calculated for appservice users. This caused performance issues on servers with large numbers of appservices.
diff --git a/debian/changelog b/debian/changelog
index b1635bc392..9417f8714f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.63.1) stable; urgency=medium
+
+  * New Synapse release 1.63.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Wed, 20 Jul 2022 13:36:52 +0100
+
 matrix-synapse-py3 (1.63.0) stable; urgency=medium
 
   * Clarify that homeserver server names are included in the data reported
diff --git a/pyproject.toml b/pyproject.toml
index 74a473bd34..9eabe15e23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ skip_gitignore = true
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.63.0"
+version = "1.63.1"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "Apache-2.0"

From a1b62af2afc4a5439b7276a02f9fd981fbfd06a4 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 20 Jul 2022 11:17:26 -0700
Subject: [PATCH 163/178] Validate federation destinations and log an error if
 server name is invalid. (#13318)

---
 changelog.d/13318.misc                     | 1 +
 synapse/http/matrixfederationclient.py     | 9 +++++++++
 tests/federation/test_federation_client.py | 4 ++--
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13318.misc

diff --git a/changelog.d/13318.misc b/changelog.d/13318.misc
new file mode 100644
index 0000000000..f5cd26b862
--- /dev/null
+++ b/changelog.d/13318.misc
@@ -0,0 +1 @@
+Validate federation destinations and log an error if a destination is invalid.
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index c63d068f74..3c35b1d2c7 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -79,6 +79,7 @@ from synapse.types import JsonDict
 from synapse.util import json_decoder
 from synapse.util.async_helpers import AwakenableSleeper, timeout_deferred
 from synapse.util.metrics import Measure
+from synapse.util.stringutils import parse_and_validate_server_name
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -479,6 +480,14 @@ class MatrixFederationHttpClient:
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
         """
+        # Validate server name and log if it is an invalid destination, this is
+        # partially to help track down code paths where we haven't validated before here
+        try:
+            parse_and_validate_server_name(request.destination)
+        except ValueError:
+            logger.exception(f"Invalid destination: {request.destination}.")
+            raise FederationDeniedError(request.destination)
+
         if timeout:
             _sec_timeout = timeout / 1000
         else:
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index d2bda07198..cf6b130e4f 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -102,7 +102,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # now fire off the request
         state_resp, auth_resp = self.get_success(
             self.hs.get_federation_client().get_room_state(
-                "yet_another_server",
+                "yet.another.server",
                 test_room_id,
                 "event_id",
                 RoomVersions.V9,
@@ -112,7 +112,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # check the right call got made to the agent
         self._mock_agent.request.assert_called_once_with(
             b"GET",
-            b"matrix://yet_another_server/_matrix/federation/v1/state/%21room_id?event_id=event_id",
+            b"matrix://yet.another.server/_matrix/federation/v1/state/%21room_id?event_id=event_id",
             headers=mock.ANY,
             bodyProducer=None,
         )

From 0f971ca68e808dd16f53f5594a6b33b7bddcc9a9 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 20 Jul 2022 15:58:51 -0500
Subject: [PATCH 164/178] Update `get_pdu` to return the original, pristine
 `EventBase` (#13320)

Update `get_pdu` to return the untouched, pristine `EventBase` as it was originally seen over federation (no metadata added). Previously, we returned the same `event` reference that we stored in the cache which downstream code modified in place and added metadata like setting it as an `outlier`  and essentially poisoned our cache. Now we always return a copy of the `event` so the original can stay pristine in our cache and re-used for the next cache call.

Split out from https://github.com/matrix-org/synapse/pull/13205

As discussed at:

 - https://github.com/matrix-org/synapse/pull/13205#discussion_r918365746
 - https://github.com/matrix-org/synapse/pull/13205#discussion_r918366125

Related to https://github.com/matrix-org/synapse/issues/12584. This PR doesn't fix that issue because it hits [`get_event` which exists from the local database before it tries to `get_pdu`](https://github.com/matrix-org/synapse/blob/7864f33e286dec22368dc0b11c06eebb1462a51e/synapse/federation/federation_client.py#L581-L594).
---
 changelog.d/13320.misc                     |   1 +
 synapse/federation/federation_client.py    | 121 +++++++++++++-------
 synapse/handlers/federation_event.py       |  22 +++-
 synapse/storage/databases/main/events.py   |  23 +++-
 tests/federation/test_federation_client.py | 125 +++++++++++++++++++--
 5 files changed, 232 insertions(+), 60 deletions(-)
 create mode 100644 changelog.d/13320.misc

diff --git a/changelog.d/13320.misc b/changelog.d/13320.misc
new file mode 100644
index 0000000000..d33cf3a25a
--- /dev/null
+++ b/changelog.d/13320.misc
@@ -0,0 +1 @@
+Fix `FederationClient.get_pdu()` returning events from the cache as `outliers` instead of original events we saw over federation.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 7c450ecad0..842f5327c2 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -53,7 +53,7 @@ from synapse.api.room_versions import (
     RoomVersion,
     RoomVersions,
 )
-from synapse.events import EventBase, builder
+from synapse.events import EventBase, builder, make_event_from_dict
 from synapse.federation.federation_base import (
     FederationBase,
     InvalidEventSignatureError,
@@ -299,7 +299,8 @@ class FederationClient(FederationBase):
                 moving to the next destination. None indicates no timeout.
 
         Returns:
-            The requested PDU, or None if we were unable to find it.
+            A copy of the requested PDU that is safe to modify, or None if we
+            were unable to find it.
 
         Raises:
             SynapseError, NotRetryingDestination, FederationDeniedError
@@ -309,7 +310,7 @@ class FederationClient(FederationBase):
         )
 
         logger.debug(
-            "retrieved event id %s from %s: %r",
+            "get_pdu_from_destination_raw: retrieved event id %s from %s: %r",
             event_id,
             destination,
             transaction_data,
@@ -358,54 +359,92 @@ class FederationClient(FederationBase):
             The requested PDU, or None if we were unable to find it.
         """
 
+        logger.debug(
+            "get_pdu: event_id=%s from destinations=%s", event_id, destinations
+        )
+
         # TODO: Rate limit the number of times we try and get the same event.
 
-        ev = self._get_pdu_cache.get(event_id)
-        if ev:
-            return ev
+        # We might need the same event multiple times in quick succession (before
+        # it gets persisted to the database), so we cache the results of the lookup.
+        # Note that this is separate to the regular get_event cache which caches
+        # events once they have been persisted.
+        event = self._get_pdu_cache.get(event_id)
 
-        pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})
+        # If we don't see the event in the cache, go try to fetch it from the
+        # provided remote federated destinations
+        if not event:
+            pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})
 
-        signed_pdu = None
-        for destination in destinations:
-            now = self._clock.time_msec()
-            last_attempt = pdu_attempts.get(destination, 0)
-            if last_attempt + PDU_RETRY_TIME_MS > now:
-                continue
+            for destination in destinations:
+                now = self._clock.time_msec()
+                last_attempt = pdu_attempts.get(destination, 0)
+                if last_attempt + PDU_RETRY_TIME_MS > now:
+                    logger.debug(
+                        "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
+                        destination,
+                        last_attempt,
+                        PDU_RETRY_TIME_MS,
+                        now,
+                    )
+                    continue
 
-            try:
-                signed_pdu = await self.get_pdu_from_destination_raw(
-                    destination=destination,
-                    event_id=event_id,
-                    room_version=room_version,
-                    timeout=timeout,
-                )
+                try:
+                    event = await self.get_pdu_from_destination_raw(
+                        destination=destination,
+                        event_id=event_id,
+                        room_version=room_version,
+                        timeout=timeout,
+                    )
 
-                pdu_attempts[destination] = now
+                    pdu_attempts[destination] = now
 
-            except SynapseError as e:
-                logger.info(
-                    "Failed to get PDU %s from %s because %s", event_id, destination, e
-                )
-                continue
-            except NotRetryingDestination as e:
-                logger.info(str(e))
-                continue
-            except FederationDeniedError as e:
-                logger.info(str(e))
-                continue
-            except Exception as e:
-                pdu_attempts[destination] = now
+                    if event:
+                        # Prime the cache
+                        self._get_pdu_cache[event.event_id] = event
 
-                logger.info(
-                    "Failed to get PDU %s from %s because %s", event_id, destination, e
-                )
-                continue
+                        # FIXME: We should add a `break` here to avoid calling every
+                        # destination after we already found a PDU (will follow-up
+                        # in a separate PR)
 
-        if signed_pdu:
-            self._get_pdu_cache[event_id] = signed_pdu
+                except SynapseError as e:
+                    logger.info(
+                        "Failed to get PDU %s from %s because %s",
+                        event_id,
+                        destination,
+                        e,
+                    )
+                    continue
+                except NotRetryingDestination as e:
+                    logger.info(str(e))
+                    continue
+                except FederationDeniedError as e:
+                    logger.info(str(e))
+                    continue
+                except Exception as e:
+                    pdu_attempts[destination] = now
 
-        return signed_pdu
+                    logger.info(
+                        "Failed to get PDU %s from %s because %s",
+                        event_id,
+                        destination,
+                        e,
+                    )
+                    continue
+
+        if not event:
+            return None
+
+        # `event` now refers to an object stored in `get_pdu_cache`. Our
+        # callers may need to modify the returned object (eg to set
+        # `event.internal_metadata.outlier = true`), so we return a copy
+        # rather than the original object.
+        event_copy = make_event_from_dict(
+            event.get_pdu_json(),
+            event.room_version,
+        )
+
+        return event_copy
 
     async def get_room_state_ids(
         self, destination: str, room_id: str, event_id: str
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index e4a5b64d10..a5f4ce7c8a 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -766,10 +766,24 @@ class FederationEventHandler:
         """
         logger.info("Processing pulled event %s", event)
 
-        # these should not be outliers.
-        assert (
-            not event.internal_metadata.is_outlier()
-        ), "pulled event unexpectedly flagged as outlier"
+        # This function should not be used to persist outliers (use something
+        # else) because this does a bunch of operations that aren't necessary
+        # (extra work; in particular, it makes sure we have all the prev_events
+        # and resolves the state across those prev events). If you happen to run
+        # into a situation where the event you're trying to process/backfill is
+        # marked as an `outlier`, then you should update that spot to return an
+        # `EventBase` copy that doesn't have `outlier` flag set.
+        #
+        # `EventBase` is used to represent both an event we have not yet
+        # persisted, and one that we have persisted and now keep in the cache.
+        # In an ideal world this method would only be called with the first type
+        # of event, but it turns out that's not actually the case and for
+        # example, you could get an event from cache that is marked as an
+        # `outlier` (fix up that spot though).
+        assert not event.internal_metadata.is_outlier(), (
+            "Outlier event passed to _process_pulled_event. "
+            "To persist an event as a non-outlier, make sure to pass in a copy without `event.internal_metadata.outlier = true`."
+        )
 
         event_id = event.event_id
 
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 156e1bd5ab..1f600f1190 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1346,9 +1346,24 @@ class PersistEventsStore:
             event_id: outlier for event_id, outlier in txn
         }
 
+        logger.debug(
+            "_update_outliers_txn: events=%s have_persisted=%s",
+            [ev.event_id for ev, _ in events_and_contexts],
+            have_persisted,
+        )
+
         to_remove = set()
         for event, context in events_and_contexts:
-            if event.event_id not in have_persisted:
+            outlier_persisted = have_persisted.get(event.event_id)
+            logger.debug(
+                "_update_outliers_txn: event=%s outlier=%s outlier_persisted=%s",
+                event.event_id,
+                event.internal_metadata.is_outlier(),
+                outlier_persisted,
+            )
+
+            # Ignore events which we haven't persisted at all
+            if outlier_persisted is None:
                 continue
 
             to_remove.add(event)
@@ -1358,7 +1373,6 @@ class PersistEventsStore:
                 # was an outlier or not - what we have is at least as good.
                 continue
 
-            outlier_persisted = have_persisted[event.event_id]
             if not event.internal_metadata.is_outlier() and outlier_persisted:
                 # We received a copy of an event that we had already stored as
                 # an outlier in the database. We now have some state at that event
@@ -1369,7 +1383,10 @@ class PersistEventsStore:
                 # events down /sync. In general they will be historical events, so that
                 # doesn't matter too much, but that is not always the case.
 
-                logger.info("Updating state for ex-outlier event %s", event.event_id)
+                logger.info(
+                    "_update_outliers_txn: Updating state for ex-outlier event %s",
+                    event.event_id,
+                )
 
                 # insert into event_to_state_groups.
                 try:
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index cf6b130e4f..50e376f695 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -22,6 +22,7 @@ from twisted.python.failure import Failure
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.room_versions import RoomVersions
+from synapse.events import EventBase
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -38,20 +39,24 @@ class FederationClientTest(FederatingHomeserverTestCase):
         self._mock_agent = mock.create_autospec(twisted.web.client.Agent, spec_set=True)
         homeserver.get_federation_http_client().agent = self._mock_agent
 
-    def test_get_room_state(self):
-        creator = f"@creator:{self.OTHER_SERVER_NAME}"
-        test_room_id = "!room_id"
+        # Move clock up to somewhat realistic time so the PDU destination retry
+        # works (`now` needs to be larger than `0 + PDU_RETRY_TIME_MS`).
+        self.reactor.advance(1000000000)
 
+        self.creator = f"@creator:{self.OTHER_SERVER_NAME}"
+        self.test_room_id = "!room_id"
+
+    def test_get_room_state(self):
         # mock up some events to use in the response.
         # In real life, these would have things in `prev_events` and `auth_events`, but that's
         # a bit annoying to mock up, and the code under test doesn't care, so we don't bother.
         create_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
-                "room_id": test_room_id,
+                "room_id": self.test_room_id,
                 "type": "m.room.create",
                 "state_key": "",
-                "sender": creator,
-                "content": {"creator": creator},
+                "sender": self.creator,
+                "content": {"creator": self.creator},
                 "prev_events": [],
                 "auth_events": [],
                 "origin_server_ts": 500,
@@ -59,10 +64,10 @@ class FederationClientTest(FederatingHomeserverTestCase):
         )
         member_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
-                "room_id": test_room_id,
+                "room_id": self.test_room_id,
                 "type": "m.room.member",
-                "sender": creator,
-                "state_key": creator,
+                "sender": self.creator,
+                "state_key": self.creator,
                 "content": {"membership": "join"},
                 "prev_events": [],
                 "auth_events": [],
@@ -71,9 +76,9 @@ class FederationClientTest(FederatingHomeserverTestCase):
         )
         pl_event_dict = self.add_hashes_and_signatures_from_other_server(
             {
-                "room_id": test_room_id,
+                "room_id": self.test_room_id,
                 "type": "m.room.power_levels",
-                "sender": creator,
+                "sender": self.creator,
                 "state_key": "",
                 "content": {},
                 "prev_events": [],
@@ -103,7 +108,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         state_resp, auth_resp = self.get_success(
             self.hs.get_federation_client().get_room_state(
                 "yet.another.server",
-                test_room_id,
+                self.test_room_id,
                 "event_id",
                 RoomVersions.V9,
             )
@@ -130,6 +135,102 @@ class FederationClientTest(FederatingHomeserverTestCase):
             ["m.room.create", "m.room.member", "m.room.power_levels"],
         )
 
+    def test_get_pdu_returns_nothing_when_event_does_not_exist(self):
+        """No event should be returned when the event does not exist"""
+        remote_pdu = self.get_success(
+            self.hs.get_federation_client().get_pdu(
+                ["yet.another.server"],
+                "event_should_not_exist",
+                RoomVersions.V9,
+            )
+        )
+        self.assertEqual(remote_pdu, None)
+
+    def test_get_pdu(self):
+        """Test to make sure an event is returned by `get_pdu()`"""
+        self._get_pdu_once()
+
+    def test_get_pdu_event_from_cache_is_pristine(self):
+        """Test that modifications made to events returned by `get_pdu()`
+        do not propagate back to to the internal cache (events returned should
+        be a copy).
+        """
+
+        # Get the PDU in the cache
+        remote_pdu = self._get_pdu_once()
+
+        # Modify the the event reference.
+        # This change should not make it back to the `_get_pdu_cache`.
+        remote_pdu.internal_metadata.outlier = True
+
+        # Get the event again. This time it should read it from cache.
+        remote_pdu2 = self.get_success(
+            self.hs.get_federation_client().get_pdu(
+                ["yet.another.server"],
+                remote_pdu.event_id,
+                RoomVersions.V9,
+            )
+        )
+
+        # Sanity check that we are working against the same event
+        self.assertEqual(remote_pdu.event_id, remote_pdu2.event_id)
+
+        # Make sure the event does not include modification from earlier
+        self.assertIsNotNone(remote_pdu2)
+        self.assertEqual(remote_pdu2.internal_metadata.outlier, False)
+
+    def _get_pdu_once(self) -> EventBase:
+        """Retrieve an event via `get_pdu()` and assert that an event was returned.
+        Also used to prime the cache for subsequent test logic.
+        """
+        message_event_dict = self.add_hashes_and_signatures_from_other_server(
+            {
+                "room_id": self.test_room_id,
+                "type": "m.room.message",
+                "sender": self.creator,
+                "state_key": "",
+                "content": {},
+                "prev_events": [],
+                "auth_events": [],
+                "origin_server_ts": 700,
+                "depth": 10,
+            }
+        )
+
+        # mock up the response, and have the agent return it
+        self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
+            _mock_response(
+                {
+                    "origin": "yet.another.server",
+                    "origin_server_ts": 900,
+                    "pdus": [
+                        message_event_dict,
+                    ],
+                }
+            )
+        )
+
+        remote_pdu = self.get_success(
+            self.hs.get_federation_client().get_pdu(
+                ["yet.another.server"],
+                "event_id",
+                RoomVersions.V9,
+            )
+        )
+
+        # check the right call got made to the agent
+        self._mock_agent.request.assert_called_once_with(
+            b"GET",
+            b"matrix://yet.another.server/_matrix/federation/v1/event/event_id",
+            headers=mock.ANY,
+            bodyProducer=None,
+        )
+
+        self.assertIsNotNone(remote_pdu)
+        self.assertEqual(remote_pdu.internal_metadata.outlier, False)
+
+        return remote_pdu
+
 
 def _mock_response(resp: JsonDict):
     body = json.dumps(resp).encode("utf-8")

From b909d5327b06af20505a8c02e34765625f215bfe Mon Sep 17 00:00:00 2001
From: David Teller <d.o.teller+github@gmail.com>
Date: Wed, 20 Jul 2022 11:04:54 +0200
Subject: [PATCH 165/178] Document `rc_invites.per_issuer`, added in v1.63.

Resolves #13330.
Missed in #13125.

Signed-off-by: David Teller <davidt@element.io>
---
 changelog.d/13333.doc                            | 1 +
 docs/usage/configuration/config_documentation.md | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 changelog.d/13333.doc

diff --git a/changelog.d/13333.doc b/changelog.d/13333.doc
new file mode 100644
index 0000000000..57cbdf05c8
--- /dev/null
+++ b/changelog.d/13333.doc
@@ -0,0 +1 @@
+Document the new `rc_invites.per_issuer` throttling option added in Synapse 1.63.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 6d7d700a13..601fdeb09e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1504,6 +1504,8 @@ The `rc_invites.per_user` limit applies to the *receiver* of the invite, rather
 sender, meaning that a `rc_invite.per_user.burst_count` of 5 mandates that a single user
 cannot *receive* more than a burst of 5 invites at a time.
 
+In contrast, the `rc_invites.per_issuer` limit applies to the *issuer* of the invite, meaning that a `rc_invite.per_issuer.burst_count` of 5 mandates that single user cannot *send* more than a burst of 5 invites at a time.
+
 Example configuration:
 ```yaml
 rc_invites:
@@ -1513,7 +1515,13 @@ rc_invites:
   per_user:
     per_second: 0.004
     burst_count: 3
+  per_issuer:
+    per_second: 0.5
+    burst_count: 5
 ```
+
+_Changed in version 1.63:_ added the `per_issuer` limit.
+
 ---
 ### `rc_third_party_invite`
 

From 190f49d8aba3b18bb9b9c2cd8352dc9b402d6bbf Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Thu, 21 Jul 2022 12:51:30 +0200
Subject: [PATCH 166/178] Use cache store remove base slaved (#13329)

This comes from two identical definitions in each of the base stores, and means the base slaved store is now empty and can be removed.
---
 changelog.d/13329.misc                        |  1 +
 synapse/app/admin_cmd.py                      |  2 -
 synapse/app/generic_worker.py                 |  2 -
 synapse/replication/slave/storage/_base.py    | 58 -------------------
 .../replication/slave/storage/account_data.py |  3 +-
 .../replication/slave/storage/deviceinbox.py  |  3 +-
 synapse/replication/slave/storage/devices.py  |  3 +-
 .../replication/slave/storage/directory.py    |  4 +-
 synapse/replication/slave/storage/events.py   |  3 -
 .../replication/slave/storage/filtering.py    |  5 +-
 synapse/replication/slave/storage/profile.py  |  3 +-
 synapse/replication/slave/storage/pushers.py  |  3 +-
 synapse/replication/slave/storage/receipts.py |  4 +-
 .../replication/slave/storage/registration.py |  4 +-
 synapse/storage/databases/main/__init__.py    | 29 +---------
 synapse/storage/databases/main/cache.py       | 26 +++++++++
 16 files changed, 39 insertions(+), 114 deletions(-)
 create mode 100644 changelog.d/13329.misc
 delete mode 100644 synapse/replication/slave/storage/_base.py

diff --git a/changelog.d/13329.misc b/changelog.d/13329.misc
new file mode 100644
index 0000000000..4df9a9f6d7
--- /dev/null
+++ b/changelog.d/13329.misc
@@ -0,0 +1 @@
+Remove old base slaved store and de-duplicate cache ID generators. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 87f82bd9a5..53ec33bcd1 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -28,7 +28,6 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.events import EventBase
 from synapse.handlers.admin import ExfiltrationWriter
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
@@ -58,7 +57,6 @@ class AdminCmdSlavedStore(
     SlavedDeviceStore,
     SlavedPushRuleStore,
     SlavedEventStore,
-    BaseSlavedStore,
     RoomWorkerStore,
 ):
     def __init__(
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 4a987fb759..0c16584abc 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -48,7 +48,6 @@ from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
@@ -251,7 +250,6 @@ class GenericWorkerSlavedStore(
     TransactionWorkerStore,
     LockStore,
     SessionStore,
-    BaseSlavedStore,
 ):
     # Properties that multiple storage classes define. Tell mypy what the
     # expected type is.
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
deleted file mode 100644
index 7644146dba..0000000000
--- a/synapse/replication/slave/storage/_base.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from typing import TYPE_CHECKING, Optional
-
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
-from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
-from synapse.storage.engines import PostgresEngine
-from synapse.storage.util.id_generators import MultiWriterIdGenerator
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class BaseSlavedStore(CacheInvalidationWorkerStore):
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        super().__init__(database, db_conn, hs)
-        if isinstance(self.database_engine, PostgresEngine):
-            self._cache_id_gen: Optional[
-                MultiWriterIdGenerator
-            ] = MultiWriterIdGenerator(
-                db_conn,
-                database,
-                stream_name="caches",
-                instance_name=hs.get_instance_name(),
-                tables=[
-                    (
-                        "cache_invalidation_stream_by_instance",
-                        "instance_name",
-                        "stream_id",
-                    )
-                ],
-                sequence_name="cache_invalidation_stream_seq",
-                writers=[],
-            )
-        else:
-            self._cache_id_gen = None
-
-        self.hs = hs
diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py
index ee74ee7d85..57d3237981 100644
--- a/synapse/replication/slave/storage/account_data.py
+++ b/synapse/replication/slave/storage/account_data.py
@@ -13,10 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
 
 
-class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore):
+class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore):
     pass
diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py
index e940751084..df9e4d8f45 100644
--- a/synapse/replication/slave/storage/deviceinbox.py
+++ b/synapse/replication/slave/storage/deviceinbox.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
 
 
-class SlavedDeviceInboxStore(DeviceInboxWorkerStore, BaseSlavedStore):
+class SlavedDeviceInboxStore(DeviceInboxWorkerStore):
     pass
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index a48cc02069..6fcade510a 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -14,7 +14,6 @@
 
 from typing import TYPE_CHECKING, Any, Iterable
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
@@ -24,7 +23,7 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-class SlavedDeviceStore(DeviceWorkerStore, BaseSlavedStore):
+class SlavedDeviceStore(DeviceWorkerStore):
     def __init__(
         self,
         database: DatabasePool,
diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py
index 71fde0c96c..ca716df3df 100644
--- a/synapse/replication/slave/storage/directory.py
+++ b/synapse/replication/slave/storage/directory.py
@@ -14,8 +14,6 @@
 
 from synapse.storage.databases.main.directory import DirectoryWorkerStore
 
-from ._base import BaseSlavedStore
 
-
-class DirectoryStore(DirectoryWorkerStore, BaseSlavedStore):
+class DirectoryStore(DirectoryWorkerStore):
     pass
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index a72dad7464..fe47778cb1 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -29,8 +29,6 @@ from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore
-
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
@@ -56,7 +54,6 @@ class SlavedEventStore(
     EventsWorkerStore,
     UserErasureWorkerStore,
     RelationsWorkerStore,
-    BaseSlavedStore,
 ):
     def __init__(
         self,
diff --git a/synapse/replication/slave/storage/filtering.py b/synapse/replication/slave/storage/filtering.py
index 4d185e2b56..c52679cd60 100644
--- a/synapse/replication/slave/storage/filtering.py
+++ b/synapse/replication/slave/storage/filtering.py
@@ -14,16 +14,15 @@
 
 from typing import TYPE_CHECKING
 
+from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main.filtering import FilteringStore
 
-from ._base import BaseSlavedStore
-
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-class SlavedFilteringStore(BaseSlavedStore):
+class SlavedFilteringStore(SQLBaseStore):
     def __init__(
         self,
         database: DatabasePool,
diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py
index 99f4a22642..a774a2ff48 100644
--- a/synapse/replication/slave/storage/profile.py
+++ b/synapse/replication/slave/storage/profile.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.storage.databases.main.profile import ProfileWorkerStore
 
 
-class SlavedProfileStore(ProfileWorkerStore, BaseSlavedStore):
+class SlavedProfileStore(ProfileWorkerStore):
     pass
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index de642bba71..44ed20e424 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -18,14 +18,13 @@ from synapse.replication.tcp.streams import PushersStream
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main.pusher import PusherWorkerStore
 
-from ._base import BaseSlavedStore
 from ._slaved_id_tracker import SlavedIdTracker
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore):
+class SlavedPusherStore(PusherWorkerStore):
     def __init__(
         self,
         database: DatabasePool,
diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py
index 3826b87dec..407862a2b2 100644
--- a/synapse/replication/slave/storage/receipts.py
+++ b/synapse/replication/slave/storage/receipts.py
@@ -15,8 +15,6 @@
 
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 
-from ._base import BaseSlavedStore
 
-
-class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore):
+class SlavedReceiptsStore(ReceiptsWorkerStore):
     pass
diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py
index 5dae35a960..52c593e59d 100644
--- a/synapse/replication/slave/storage/registration.py
+++ b/synapse/replication/slave/storage/registration.py
@@ -14,8 +14,6 @@
 
 from synapse.storage.databases.main.registration import RegistrationWorkerStore
 
-from ._base import BaseSlavedStore
 
-
-class SlavedRegistrationStore(RegistrationWorkerStore, BaseSlavedStore):
+class SlavedRegistrationStore(RegistrationWorkerStore):
     pass
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index a3d31d3737..4dccbb732a 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -24,9 +24,9 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.main.stats import UserSortOrder
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.types import Cursor
-from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
+from synapse.storage.util.id_generators import StreamIdGenerator
 from synapse.types import JsonDict, get_domain_from_id
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
@@ -149,31 +149,6 @@ class DataStore(
             ],
         )
 
-        self._cache_id_gen: Optional[MultiWriterIdGenerator]
-        if isinstance(self.database_engine, PostgresEngine):
-            # We set the `writers` to an empty list here as we don't care about
-            # missing updates over restarts, as we'll not have anything in our
-            # caches to invalidate. (This reduces the amount of writes to the DB
-            # that happen).
-            self._cache_id_gen = MultiWriterIdGenerator(
-                db_conn,
-                database,
-                stream_name="caches",
-                instance_name=hs.get_instance_name(),
-                tables=[
-                    (
-                        "cache_invalidation_stream_by_instance",
-                        "instance_name",
-                        "stream_id",
-                    )
-                ],
-                sequence_name="cache_invalidation_stream_seq",
-                writers=[],
-            )
-
-        else:
-            self._cache_id_gen = None
-
         super().__init__(database, db_conn, hs)
 
         events_max = self._stream_id_gen.get_current_token()
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 2367ddeea3..12e9a42382 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -32,6 +32,7 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.engines import PostgresEngine
+from synapse.storage.util.id_generators import MultiWriterIdGenerator
 from synapse.util.caches.descriptors import _CachedFunction
 from synapse.util.iterutils import batch_iter
 
@@ -65,6 +66,31 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             psql_only=True,  # The table is only on postgres DBs.
         )
 
+        self._cache_id_gen: Optional[MultiWriterIdGenerator]
+        if isinstance(self.database_engine, PostgresEngine):
+            # We set the `writers` to an empty list here as we don't care about
+            # missing updates over restarts, as we'll not have anything in our
+            # caches to invalidate. (This reduces the amount of writes to the DB
+            # that happen).
+            self._cache_id_gen = MultiWriterIdGenerator(
+                db_conn,
+                database,
+                stream_name="caches",
+                instance_name=hs.get_instance_name(),
+                tables=[
+                    (
+                        "cache_invalidation_stream_by_instance",
+                        "instance_name",
+                        "stream_id",
+                    )
+                ],
+                sequence_name="cache_invalidation_stream_seq",
+                writers=[],
+            )
+
+        else:
+            self._cache_id_gen = None
+
     async def get_all_updated_caches(
         self, instance_name: str, last_id: int, current_id: int, limit: int
     ) -> Tuple[List[Tuple[int, tuple]], int, bool]:

From 50122754c8743df5c904e81b634fdfdeea64e795 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 21 Jul 2022 08:01:52 -0400
Subject: [PATCH 167/178] Add missing types to opentracing. (#13345)

After this change `synapse.logging` is fully typed.
---
 changelog.d/13328.misc                        |  2 +-
 changelog.d/13345.misc                        |  1 +
 mypy.ini                                      |  3 --
 synapse/federation/transport/server/_base.py  |  2 +-
 synapse/handlers/device.py                    |  8 ++--
 synapse/handlers/e2e_keys.py                  | 16 +++----
 synapse/handlers/e2e_room_keys.py             |  4 +-
 synapse/logging/opentracing.py                | 44 +++++++++++++++----
 synapse/metrics/background_process_metrics.py |  2 +-
 synapse/rest/client/keys.py                   |  4 +-
 synapse/storage/databases/main/deviceinbox.py |  2 +-
 synapse/storage/databases/main/devices.py     |  4 +-
 .../storage/databases/main/end_to_end_keys.py |  6 +--
 tests/logging/test_opentracing.py             | 30 +++++++++----
 14 files changed, 83 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/13345.misc

diff --git a/changelog.d/13328.misc b/changelog.d/13328.misc
index d15fb5fc37..c80578ce95 100644
--- a/changelog.d/13328.misc
+++ b/changelog.d/13328.misc
@@ -1 +1 @@
-Add type hints to `trace` decorator.
+Add missing type hints to open tracing module.
diff --git a/changelog.d/13345.misc b/changelog.d/13345.misc
new file mode 100644
index 0000000000..c80578ce95
--- /dev/null
+++ b/changelog.d/13345.misc
@@ -0,0 +1 @@
+Add missing type hints to open tracing module.
diff --git a/mypy.ini b/mypy.ini
index ea0ab003a8..6add272990 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -84,9 +84,6 @@ disallow_untyped_defs = False
 [mypy-synapse.http.matrixfederationclient]
 disallow_untyped_defs = False
 
-[mypy-synapse.logging.opentracing]
-disallow_untyped_defs = False
-
 [mypy-synapse.metrics._reactor_metrics]
 disallow_untyped_defs = False
 # This module imports select.epoll. That exists on Linux, but doesn't on macOS.
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index 84100a5a52..bb0f8d6b7b 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -309,7 +309,7 @@ class BaseFederationServlet:
                 raise
 
             # update the active opentracing span with the authenticated entity
-            set_tag("authenticated_entity", origin)
+            set_tag("authenticated_entity", str(origin))
 
             # if the origin is authenticated and whitelisted, use its span context
             # as the parent.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c05a170c55..1a8379854c 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -118,8 +118,8 @@ class DeviceWorkerHandler:
         ips = await self.store.get_last_client_ip_by_device(user_id, device_id)
         _update_device_from_client_ips(device, ips)
 
-        set_tag("device", device)
-        set_tag("ips", ips)
+        set_tag("device", str(device))
+        set_tag("ips", str(ips))
 
         return device
 
@@ -170,7 +170,7 @@ class DeviceWorkerHandler:
         """
 
         set_tag("user_id", user_id)
-        set_tag("from_token", from_token)
+        set_tag("from_token", str(from_token))
         now_room_key = self.store.get_room_max_token()
 
         room_ids = await self.store.get_rooms_for_user(user_id)
@@ -795,7 +795,7 @@ class DeviceListUpdater:
         """
 
         set_tag("origin", origin)
-        set_tag("edu_content", edu_content)
+        set_tag("edu_content", str(edu_content))
         user_id = edu_content.pop("user_id")
         device_id = edu_content.pop("device_id")
         stream_id = str(edu_content.pop("stream_id"))  # They may come as ints
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 84c28c480e..c938339ddd 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -138,8 +138,8 @@ class E2eKeysHandler:
                 else:
                     remote_queries[user_id] = device_ids
 
-            set_tag("local_key_query", local_query)
-            set_tag("remote_key_query", remote_queries)
+            set_tag("local_key_query", str(local_query))
+            set_tag("remote_key_query", str(remote_queries))
 
             # First get local devices.
             # A map of destination -> failure response.
@@ -343,7 +343,7 @@ class E2eKeysHandler:
             failure = _exception_to_failure(e)
             failures[destination] = failure
             set_tag("error", True)
-            set_tag("reason", failure)
+            set_tag("reason", str(failure))
 
         return
 
@@ -405,7 +405,7 @@ class E2eKeysHandler:
         Returns:
             A map from user_id -> device_id -> device details
         """
-        set_tag("local_query", query)
+        set_tag("local_query", str(query))
         local_query: List[Tuple[str, Optional[str]]] = []
 
         result_dict: Dict[str, Dict[str, dict]] = {}
@@ -477,8 +477,8 @@ class E2eKeysHandler:
                 domain = get_domain_from_id(user_id)
                 remote_queries.setdefault(domain, {})[user_id] = one_time_keys
 
-        set_tag("local_key_query", local_query)
-        set_tag("remote_key_query", remote_queries)
+        set_tag("local_key_query", str(local_query))
+        set_tag("remote_key_query", str(remote_queries))
 
         results = await self.store.claim_e2e_one_time_keys(local_query)
 
@@ -508,7 +508,7 @@ class E2eKeysHandler:
                 failure = _exception_to_failure(e)
                 failures[destination] = failure
                 set_tag("error", True)
-                set_tag("reason", failure)
+                set_tag("reason", str(failure))
 
         await make_deferred_yieldable(
             defer.gatherResults(
@@ -611,7 +611,7 @@ class E2eKeysHandler:
 
         result = await self.store.count_e2e_one_time_keys(user_id, device_id)
 
-        set_tag("one_time_key_counts", result)
+        set_tag("one_time_key_counts", str(result))
         return {"one_time_key_counts": result}
 
     async def _upload_one_time_keys_for_user(
diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py
index 446f509bdc..28dc08c22a 100644
--- a/synapse/handlers/e2e_room_keys.py
+++ b/synapse/handlers/e2e_room_keys.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict, Optional, cast
 
 from typing_extensions import Literal
 
@@ -97,7 +97,7 @@ class E2eRoomKeysHandler:
                 user_id, version, room_id, session_id
             )
 
-            log_kv(results)
+            log_kv(cast(JsonDict, results))
             return results
 
     @trace
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 17e729f0c7..ad5cbf46a4 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -182,6 +182,8 @@ from typing import (
     Type,
     TypeVar,
     Union,
+    cast,
+    overload,
 )
 
 import attr
@@ -328,6 +330,7 @@ class _Sentinel(enum.Enum):
 
 P = ParamSpec("P")
 R = TypeVar("R")
+T = TypeVar("T")
 
 
 def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]:
@@ -343,22 +346,43 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]:
     return _only_if_tracing_inner
 
 
-def ensure_active_span(message: str, ret=None):
+@overload
+def ensure_active_span(
+    message: str,
+) -> Callable[[Callable[P, R]], Callable[P, Optional[R]]]:
+    ...
+
+
+@overload
+def ensure_active_span(
+    message: str, ret: T
+) -> Callable[[Callable[P, R]], Callable[P, Union[T, R]]]:
+    ...
+
+
+def ensure_active_span(
+    message: str, ret: Optional[T] = None
+) -> Callable[[Callable[P, R]], Callable[P, Union[Optional[T], R]]]:
     """Executes the operation only if opentracing is enabled and there is an active span.
     If there is no active span it logs message at the error level.
 
     Args:
         message: Message which fills in "There was no active span when trying to %s"
             in the error log if there is no active span and opentracing is enabled.
-        ret (object): return value if opentracing is None or there is no active span.
+        ret: return value if opentracing is None or there is no active span.
 
-    Returns (object): The result of the func or ret if opentracing is disabled or there
+    Returns:
+        The result of the func, falling back to ret if opentracing is disabled or there
         was no active span.
     """
 
-    def ensure_active_span_inner_1(func):
+    def ensure_active_span_inner_1(
+        func: Callable[P, R]
+    ) -> Callable[P, Union[Optional[T], R]]:
         @wraps(func)
-        def ensure_active_span_inner_2(*args, **kwargs):
+        def ensure_active_span_inner_2(
+            *args: P.args, **kwargs: P.kwargs
+        ) -> Union[Optional[T], R]:
             if not opentracing:
                 return ret
 
@@ -464,7 +488,7 @@ def start_active_span(
     finish_on_close: bool = True,
     *,
     tracer: Optional["opentracing.Tracer"] = None,
-):
+) -> "opentracing.Scope":
     """Starts an active opentracing span.
 
     Records the start time for the span, and sets it as the "active span" in the
@@ -502,7 +526,7 @@ def start_active_span_follows_from(
     *,
     inherit_force_tracing: bool = False,
     tracer: Optional["opentracing.Tracer"] = None,
-):
+) -> "opentracing.Scope":
     """Starts an active opentracing span, with additional references to previous spans
 
     Args:
@@ -717,7 +741,9 @@ def inject_response_headers(response_headers: Headers) -> None:
         response_headers.addRawHeader("Synapse-Trace-Id", f"{trace_id:x}")
 
 
-@ensure_active_span("get the active span context as a dict", ret={})
+@ensure_active_span(
+    "get the active span context as a dict", ret=cast(Dict[str, str], {})
+)
 def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str]:
     """
     Gets a span context as a dict. This can be used instead of manually
@@ -886,7 +912,7 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         for i, arg in enumerate(argspec.args[1:]):
             set_tag("ARG_" + arg, args[i])  # type: ignore[index]
         set_tag("args", args[len(argspec.args) :])  # type: ignore[index]
-        set_tag("kwargs", kwargs)
+        set_tag("kwargs", str(kwargs))
         return func(*args, **kwargs)
 
     return _tag_args_inner
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index eef3462e10..7a1516d3a8 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -235,7 +235,7 @@ def run_as_background_process(
                         f"bgproc.{desc}", tags={SynapseTags.REQUEST_ID: str(context)}
                     )
                 else:
-                    ctx = nullcontext()
+                    ctx = nullcontext()  # type: ignore[assignment]
                 with ctx:
                     return await func(*args, **kwargs)
             except Exception:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index eb1b85721f..e3f454896a 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -208,7 +208,9 @@ class KeyChangesServlet(RestServlet):
 
         # We want to enforce they do pass us one, but we ignore it and return
         # changes after the "to" as well as before.
-        set_tag("to", parse_string(request, "to"))
+        #
+        # XXX This does not enforce that "to" is passed.
+        set_tag("to", str(parse_string(request, "to")))
 
         from_token = await StreamToken.from_string(self.store, from_token_string)
 
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 422e0e65ca..73c95ffb6f 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -436,7 +436,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             (user_id, device_id), None
         )
 
-        set_tag("last_deleted_stream_id", last_deleted_stream_id)
+        set_tag("last_deleted_stream_id", str(last_deleted_stream_id))
 
         if last_deleted_stream_id:
             has_changed = self._device_inbox_stream_cache.has_entity_changed(
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 7a6ed332aa..ca0fe8c4be 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -706,8 +706,8 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
             else:
                 results[user_id] = await self.get_cached_devices_for_user(user_id)
 
-        set_tag("in_cache", results)
-        set_tag("not_in_cache", user_ids_not_in_cache)
+        set_tag("in_cache", str(results))
+        set_tag("not_in_cache", str(user_ids_not_in_cache))
 
         return user_ids_not_in_cache, results
 
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 60f622ad71..46c0d06157 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -146,7 +146,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             key data.  The key data will be a dict in the same format as the
             DeviceKeys type returned by POST /_matrix/client/r0/keys/query.
         """
-        set_tag("query_list", query_list)
+        set_tag("query_list", str(query_list))
         if not query_list:
             return {}
 
@@ -418,7 +418,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         def _add_e2e_one_time_keys(txn: LoggingTransaction) -> None:
             set_tag("user_id", user_id)
             set_tag("device_id", device_id)
-            set_tag("new_keys", new_keys)
+            set_tag("new_keys", str(new_keys))
             # We are protected from race between lookup and insertion due to
             # a unique constraint. If there is a race of two calls to
             # `add_e2e_one_time_keys` then they'll conflict and we will only
@@ -1161,7 +1161,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
             set_tag("user_id", user_id)
             set_tag("device_id", device_id)
             set_tag("time_now", time_now)
-            set_tag("device_keys", device_keys)
+            set_tag("device_keys", str(device_keys))
 
             old_key_json = self.db_pool.simple_select_one_onecol_txn(
                 txn,
diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py
index 40148d503c..3b14c76d7e 100644
--- a/tests/logging/test_opentracing.py
+++ b/tests/logging/test_opentracing.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import cast
+
 from twisted.internet import defer
 from twisted.test.proto_helpers import MemoryReactorClock
 
@@ -40,6 +42,15 @@ from tests.unittest import TestCase
 
 
 class LogContextScopeManagerTestCase(TestCase):
+    """
+    Test logging contexts and active opentracing spans.
+
+    There's casts throughout this from generic opentracing objects (e.g.
+    opentracing.Span) to the ones specific to Jaeger since they have additional
+    properties that these tests depend on. This is safe since the only supported
+    opentracing backend is Jaeger.
+    """
+
     if LogContextScopeManager is None:
         skip = "Requires opentracing"  # type: ignore[unreachable]
     if jaeger_client is None:
@@ -69,7 +80,7 @@ class LogContextScopeManagerTestCase(TestCase):
 
             # start_active_span should start and activate a span.
             scope = start_active_span("span", tracer=self._tracer)
-            span = scope.span
+            span = cast(jaeger_client.Span, scope.span)
             self.assertEqual(self._tracer.active_span, span)
             self.assertIsNotNone(span.start_time)
 
@@ -91,6 +102,7 @@ class LogContextScopeManagerTestCase(TestCase):
         with LoggingContext("root context"):
             with start_active_span("root span", tracer=self._tracer) as root_scope:
                 self.assertEqual(self._tracer.active_span, root_scope.span)
+                root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
 
                 scope1 = start_active_span(
                     "child1",
@@ -99,9 +111,8 @@ class LogContextScopeManagerTestCase(TestCase):
                 self.assertEqual(
                     self._tracer.active_span, scope1.span, "child1 was not activated"
                 )
-                self.assertEqual(
-                    scope1.span.context.parent_id, root_scope.span.context.span_id
-                )
+                context1 = cast(jaeger_client.SpanContext, scope1.span.context)
+                self.assertEqual(context1.parent_id, root_context.span_id)
 
                 scope2 = start_active_span_follows_from(
                     "child2",
@@ -109,17 +120,18 @@ class LogContextScopeManagerTestCase(TestCase):
                     tracer=self._tracer,
                 )
                 self.assertEqual(self._tracer.active_span, scope2.span)
-                self.assertEqual(
-                    scope2.span.context.parent_id, scope1.span.context.span_id
-                )
+                context2 = cast(jaeger_client.SpanContext, scope2.span.context)
+                self.assertEqual(context2.parent_id, context1.span_id)
 
                 with scope1, scope2:
                     pass
 
                 # the root scope should be restored
                 self.assertEqual(self._tracer.active_span, root_scope.span)
-                self.assertIsNotNone(scope2.span.end_time)
-                self.assertIsNotNone(scope1.span.end_time)
+                span2 = cast(jaeger_client.Span, scope2.span)
+                span1 = cast(jaeger_client.Span, scope1.span)
+                self.assertIsNotNone(span2.end_time)
+                self.assertIsNotNone(span1.end_time)
 
             self.assertIsNone(self._tracer.active_span)
 

From 34949ead1f1f290710441d40187f7a35534ec1b2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 21 Jul 2022 13:23:05 +0100
Subject: [PATCH 168/178] Track DB txn times w/ two counters, not histogram
 (#13342)

---
 changelog.d/13342.misc      | 1 +
 synapse/storage/database.py | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13342.misc

diff --git a/changelog.d/13342.misc b/changelog.d/13342.misc
new file mode 100644
index 0000000000..ce9c816b9c
--- /dev/null
+++ b/changelog.d/13342.misc
@@ -0,0 +1 @@
+When reporting metrics is enabled, use ~8x less data to describe DB transaction metrics.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index ea672ff89e..b394a6658b 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -39,7 +39,7 @@ from typing import (
 )
 
 import attr
-from prometheus_client import Histogram
+from prometheus_client import Counter, Histogram
 from typing_extensions import Concatenate, Literal, ParamSpec
 
 from twisted.enterprise import adbapi
@@ -76,7 +76,8 @@ perf_logger = logging.getLogger("synapse.storage.TIME")
 sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec")
 
 sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"])
-sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"])
+sql_txn_count = Counter("synapse_storage_transaction_time_count", "sec", ["desc"])
+sql_txn_duration = Counter("synapse_storage_transaction_time_sum", "sec", ["desc"])
 
 
 # Unique indexes which have been added in background updates. Maps from table name
@@ -795,7 +796,8 @@ class DatabasePool:
 
             self._current_txn_total_time += duration
             self._txn_perf_counters.update(desc, duration)
-            sql_txn_timer.labels(desc).observe(duration)
+            sql_txn_count.labels(desc).inc(1)
+            sql_txn_duration.labels(desc).inc(duration)
 
     async def runInteraction(
         self,

From 10e40938398b6a462801be7eab9f5e6260649a0a Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 21 Jul 2022 14:29:58 +0200
Subject: [PATCH 169/178] Call out buildkit is required when building test
 docker images (#13338)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13338.doc        | 1 +
 docker/Dockerfile-workers    | 1 +
 docker/README-testing.md     | 4 ++++
 docker/complement/Dockerfile | 1 +
 4 files changed, 7 insertions(+)
 create mode 100644 changelog.d/13338.doc

diff --git a/changelog.d/13338.doc b/changelog.d/13338.doc
new file mode 100644
index 0000000000..7acf6d3f34
--- /dev/null
+++ b/changelog.d/13338.doc
@@ -0,0 +1 @@
+Mention that BuildKit is needed when building Docker images for tests.
diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers
index 0f1570cfb6..84f836ff7b 100644
--- a/docker/Dockerfile-workers
+++ b/docker/Dockerfile-workers
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1
 # Inherit from the official Synapse docker image
 ARG SYNAPSE_VERSION=latest
 FROM matrixdotorg/synapse:$SYNAPSE_VERSION
diff --git a/docker/README-testing.md b/docker/README-testing.md
index 1f0423f09b..21b99963d8 100644
--- a/docker/README-testing.md
+++ b/docker/README-testing.md
@@ -22,6 +22,10 @@ Consult the [contributing guide][guideComplementSh] for instructions on how to u
 Under some circumstances, you may wish to build the images manually.
 The instructions below will lead you to doing that.
 
+Note that these images can only be built using [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/),
+therefore BuildKit needs to be enabled when calling `docker build`. This can be done by
+setting `DOCKER_BUILDKIT=1` in your environment.
+
 Start by building the base Synapse docker image. If you wish to run tests with the latest
 release of Synapse, instead of your current checkout, you can skip this step. From the
 root of the repository:
diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile
index c5e7984a28..3cfff19f9a 100644
--- a/docker/complement/Dockerfile
+++ b/docker/complement/Dockerfile
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1
 # This dockerfile builds on top of 'docker/Dockerfile-workers' in matrix-org/synapse
 # by including a built-in postgres instance, as well as setting up the homeserver so
 # that it is ready for testing via Complement.

From 13341dde5a4854588ec89b832aed256524abff73 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 21 Jul 2022 16:02:02 +0100
Subject: [PATCH 170/178] Don't hold onto full state in state cache (#13324)

---
 changelog.d/13324.misc    |  1 +
 synapse/state/__init__.py | 70 ++++++++++++++++++++++++++++++---------
 2 files changed, 55 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/13324.misc

diff --git a/changelog.d/13324.misc b/changelog.d/13324.misc
new file mode 100644
index 0000000000..30670cf56c
--- /dev/null
+++ b/changelog.d/13324.misc
@@ -0,0 +1 @@
+Reduce the amount of state we store in the `state_cache`.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index e3faa52cd6..87ccd52f0a 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import heapq
 import logging
-from collections import defaultdict
+from collections import ChainMap, defaultdict
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -92,8 +92,11 @@ class _StateCacheEntry:
         prev_group: Optional[int] = None,
         delta_ids: Optional[StateMap[str]] = None,
     ):
-        if state is None and state_group is None:
-            raise Exception("Either state or state group must be not None")
+        if state is None and state_group is None and prev_group is None:
+            raise Exception("One of state, state_group or prev_group must be not None")
+
+        if prev_group is not None and delta_ids is None:
+            raise Exception("If prev_group is set so must delta_ids")
 
         # A map from (type, state_key) to event_id.
         #
@@ -120,18 +123,48 @@ class _StateCacheEntry:
         if self._state is not None:
             return self._state
 
-        assert self.state_group is not None
+        if self.state_group is not None:
+            return await state_storage.get_state_ids_for_group(
+                self.state_group, state_filter
+            )
 
-        return await state_storage.get_state_ids_for_group(
-            self.state_group, state_filter
+        assert self.prev_group is not None and self.delta_ids is not None
+
+        prev_state = await state_storage.get_state_ids_for_group(
+            self.prev_group, state_filter
         )
 
-    def __len__(self) -> int:
-        # The len should is used to estimate how large this cache entry is, for
-        # cache eviction purposes. This is why if `self.state` is None it's fine
-        # to return 1.
+        # ChainMap expects MutableMapping, but since we're using it immutably
+        # its safe to give it immutable maps.
+        return ChainMap(self.delta_ids, prev_state)  # type: ignore[arg-type]
 
-        return len(self._state) if self._state else 1
+    def set_state_group(self, state_group: int) -> None:
+        """Update the state group assigned to this state (e.g. after we've
+        persisted it).
+
+        Note: this will cause the cache entry to drop any stored state.
+        """
+
+        self.state_group = state_group
+
+        # We clear out the state as we know longer need to explicitly keep it in
+        # the `state_cache` (as the store state group cache will do that).
+        self._state = None
+
+    def __len__(self) -> int:
+        # The len should be used to estimate how large this cache entry is, for
+        # cache eviction purposes. This is why it's fine to return 1 if we're
+        # not storing any state.
+
+        length = 0
+
+        if self._state:
+            length += len(self._state)
+
+        if self.delta_ids:
+            length += len(self.delta_ids)
+
+        return length or 1  # Make sure its not 0.
 
 
 class StateHandler:
@@ -320,7 +353,7 @@ class StateHandler:
                         current_state_ids=state_ids_before_event,
                     )
                 )
-                entry.state_group = state_group_before_event
+                entry.set_state_group(state_group_before_event)
             else:
                 state_group_before_event = entry.state_group
 
@@ -747,7 +780,7 @@ def _make_state_cache_entry(
         old_state_event_ids = set(state.values())
         if new_state_event_ids == old_state_event_ids:
             # got an exact match.
-            return _StateCacheEntry(state=new_state, state_group=sg)
+            return _StateCacheEntry(state=None, state_group=sg)
 
     # TODO: We want to create a state group for this set of events, to
     # increase cache hits, but we need to make sure that it doesn't
@@ -769,9 +802,14 @@ def _make_state_cache_entry(
             prev_group = old_group
             delta_ids = n_delta_ids
 
-    return _StateCacheEntry(
-        state=new_state, state_group=None, prev_group=prev_group, delta_ids=delta_ids
-    )
+    if prev_group is not None:
+        # If we have a prev group and deltas then we can drop the new state from
+        # the cache (to reduce memory usage).
+        return _StateCacheEntry(
+            state=None, state_group=None, prev_group=prev_group, delta_ids=delta_ids
+        )
+    else:
+        return _StateCacheEntry(state=new_state, state_group=None)
 
 
 @attr.s(slots=True, auto_attribs=True)

From 0b87eb8e0c8e2dd4a426005dce53dfdd57282475 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 21 Jul 2022 17:13:44 +0100
Subject: [PATCH 171/178] Make DictionaryCache have better expiry properties
 (#13292)

---
 changelog.d/13292.misc                   |   1 +
 synapse/storage/databases/state/store.py |   9 +-
 synapse/util/caches/dictionary_cache.py  | 218 +++++++++++++++++++----
 synapse/util/caches/lrucache.py          |  90 +++++++++-
 synapse/util/caches/treecache.py         |  38 ++++
 tests/storage/test_state.py              |  10 +-
 tests/util/test_dict_cache.py            |  35 +++-
 7 files changed, 358 insertions(+), 43 deletions(-)
 create mode 100644 changelog.d/13292.misc

diff --git a/changelog.d/13292.misc b/changelog.d/13292.misc
new file mode 100644
index 0000000000..67fec55330
--- /dev/null
+++ b/changelog.d/13292.misc
@@ -0,0 +1 @@
+Make `DictionaryCache` expire full entries if they haven't been queried in a while, even if specific keys have been queried recently.
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index afbc85ad0c..bb64543c1f 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -202,7 +202,14 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 requests state from the cache, if False we need to query the DB for the
                 missing state.
         """
-        cache_entry = cache.get(group)
+        # If we are asked explicitly for a subset of keys, we only ask for those
+        # from the cache. This ensures that the `DictionaryCache` can make
+        # better decisions about what to cache and what to expire.
+        dict_keys = None
+        if not state_filter.has_wildcards():
+            dict_keys = state_filter.concrete_types()
+
+        cache_entry = cache.get(group, dict_keys=dict_keys)
         state_dict_ids = cache_entry.value
 
         if cache_entry.full or state_filter.is_full():
diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index d267703df0..fa91479c97 100644
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@@ -14,11 +14,13 @@
 import enum
 import logging
 import threading
-from typing import Any, Dict, Generic, Iterable, Optional, Set, TypeVar
+from typing import Any, Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union
 
 import attr
+from typing_extensions import Literal
 
 from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.treecache import TreeCache
 
 logger = logging.getLogger(__name__)
 
@@ -33,10 +35,12 @@ DV = TypeVar("DV")
 
 # This class can't be generic because it uses slots with attrs.
 # See: https://github.com/python-attrs/attrs/issues/313
-@attr.s(slots=True, auto_attribs=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class DictionaryEntry:  # should be: Generic[DKT, DV].
     """Returned when getting an entry from the cache
 
+    If `full` is true then `known_absent` will be the empty set.
+
     Attributes:
         full: Whether the cache has the full or dict or just some keys.
             If not full then not all requested keys will necessarily be present
@@ -53,20 +57,90 @@ class DictionaryEntry:  # should be: Generic[DKT, DV].
         return len(self.value)
 
 
+class _FullCacheKey(enum.Enum):
+    """The key we use to cache the full dict."""
+
+    KEY = object()
+
+
 class _Sentinel(enum.Enum):
     # defining a sentinel in this way allows mypy to correctly handle the
     # type of a dictionary lookup.
     sentinel = object()
 
 
+class _PerKeyValue(Generic[DV]):
+    """The cached value of a dictionary key. If `value` is the sentinel,
+    indicates that the requested key is known to *not* be in the full dict.
+    """
+
+    __slots__ = ["value"]
+
+    def __init__(self, value: Union[DV, Literal[_Sentinel.sentinel]]) -> None:
+        self.value = value
+
+    def __len__(self) -> int:
+        # We add a `__len__` implementation as we use this class in a cache
+        # where the values are variable length.
+        return 1
+
+
 class DictionaryCache(Generic[KT, DKT, DV]):
     """Caches key -> dictionary lookups, supporting caching partial dicts, i.e.
     fetching a subset of dictionary keys for a particular key.
+
+    This cache has two levels of key. First there is the "cache key" (of type
+    `KT`), which maps to a dict. The keys to that dict are the "dict key" (of
+    type `DKT`). The overall structure is therefore `KT->DKT->DV`. For
+    example, it might look like:
+
+       {
+           1: { 1: "a", 2: "b" },
+           2: { 1: "c" },
+       }
+
+    It is possible to look up either individual dict keys, or the *complete*
+    dict for a given cache key.
+
+    Each dict item, and the complete dict is treated as a separate LRU
+    entry for the purpose of cache expiry. For example, given:
+        dict_cache.get(1, None)  -> DictionaryEntry({1: "a", 2: "b"})
+        dict_cache.get(1, [1])  -> DictionaryEntry({1: "a"})
+        dict_cache.get(1, [2])  -> DictionaryEntry({2: "b"})
+
+    ... then the cache entry for the complete dict will expire first,
+    followed by the cache entry for the '1' dict key, and finally that
+    for the '2' dict key.
     """
 
     def __init__(self, name: str, max_entries: int = 1000):
-        self.cache: LruCache[KT, DictionaryEntry] = LruCache(
-            max_size=max_entries, cache_name=name, size_callback=len
+        # We use a single LruCache to store two different types of entries:
+        #   1. Map from (key, dict_key) -> dict value (or sentinel, indicating
+        #      the key doesn't exist in the dict); and
+        #   2. Map from (key, _FullCacheKey.KEY) -> full dict.
+        #
+        # The former is used when explicit keys of the dictionary are looked up,
+        # and the latter when the full dictionary is requested.
+        #
+        # If when explicit keys are requested and not in the cache, we then look
+        # to see if we have the full dict and use that if we do. If found in the
+        # full dict each key is added into the cache.
+        #
+        # This set up allows the `LruCache` to prune the full dict entries if
+        # they haven't been used in a while, even when there have been recent
+        # queries for subsets of the dict.
+        #
+        # Typing:
+        #     * A key of `(KT, DKT)` has a value of `_PerKeyValue`
+        #     * A key of `(KT, _FullCacheKey.KEY)` has a value of `Dict[DKT, DV]`
+        self.cache: LruCache[
+            Tuple[KT, Union[DKT, Literal[_FullCacheKey.KEY]]],
+            Union[_PerKeyValue, Dict[DKT, DV]],
+        ] = LruCache(
+            max_size=max_entries,
+            cache_name=name,
+            cache_type=TreeCache,
+            size_callback=len,
         )
 
         self.name = name
@@ -91,23 +165,83 @@ class DictionaryCache(Generic[KT, DKT, DV]):
         Args:
             key
             dict_keys: If given a set of keys then return only those keys
-                that exist in the cache.
+                that exist in the cache. If None then returns the full dict
+                if it is in the cache.
 
         Returns:
-            DictionaryEntry
+            DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry`
+            will contain include the keys that are in the cache. If None then
+            will either return the full dict if in the cache, or the empty
+            dict (with `full` set to False) if it isn't.
         """
-        entry = self.cache.get(key, _Sentinel.sentinel)
-        if entry is not _Sentinel.sentinel:
-            if dict_keys is None:
-                return DictionaryEntry(
-                    entry.full, entry.known_absent, dict(entry.value)
-                )
+        if dict_keys is None:
+            # The caller wants the full set of dictionary keys for this cache key
+            return self._get_full_dict(key)
+
+        # We are being asked for a subset of keys.
+
+        # First go and check for each requested dict key in the cache, tracking
+        # which we couldn't find.
+        values = {}
+        known_absent = set()
+        missing = []
+        for dict_key in dict_keys:
+            entry = self.cache.get((key, dict_key), _Sentinel.sentinel)
+            if entry is _Sentinel.sentinel:
+                missing.append(dict_key)
+                continue
+
+            assert isinstance(entry, _PerKeyValue)
+
+            if entry.value is _Sentinel.sentinel:
+                known_absent.add(dict_key)
             else:
-                return DictionaryEntry(
-                    entry.full,
-                    entry.known_absent,
-                    {k: entry.value[k] for k in dict_keys if k in entry.value},
-                )
+                values[dict_key] = entry.value
+
+        # If we found everything we can return immediately.
+        if not missing:
+            return DictionaryEntry(False, known_absent, values)
+
+        # We are missing some keys, so check if we happen to have the full dict in
+        # the cache.
+        #
+        # We don't update the last access time for this cache fetch, as we
+        # aren't explicitly interested in the full dict and so we don't want
+        # requests for explicit dict keys to keep the full dict in the cache.
+        entry = self.cache.get(
+            (key, _FullCacheKey.KEY),
+            _Sentinel.sentinel,
+            update_last_access=False,
+        )
+        if entry is _Sentinel.sentinel:
+            # Not in the cache, return the subset of keys we found.
+            return DictionaryEntry(False, known_absent, values)
+
+        # We have the full dict!
+        assert isinstance(entry, dict)
+
+        for dict_key in missing:
+            # We explicitly add each dict key to the cache, so that cache hit
+            # rates and LRU times for each key can be tracked separately.
+            value = entry.get(dict_key, _Sentinel.sentinel)  # type: ignore[arg-type]
+            self.cache[(key, dict_key)] = _PerKeyValue(value)
+
+            if value is not _Sentinel.sentinel:
+                values[dict_key] = value
+
+        return DictionaryEntry(True, set(), values)
+
+    def _get_full_dict(
+        self,
+        key: KT,
+    ) -> DictionaryEntry:
+        """Fetch the full dict for the given key."""
+
+        # First we check if we have cached the full dict.
+        entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel)
+        if entry is not _Sentinel.sentinel:
+            assert isinstance(entry, dict)
+            return DictionaryEntry(True, set(), entry)
 
         return DictionaryEntry(False, set(), {})
 
@@ -117,7 +251,13 @@ class DictionaryCache(Generic[KT, DKT, DV]):
         # Increment the sequence number so that any SELECT statements that
         # raced with the INSERT don't update the cache (SYN-369)
         self.sequence += 1
-        self.cache.pop(key, None)
+
+        # We want to drop all information about the dict for the given key, so
+        # we use `del_multi` to delete it all in one go.
+        #
+        # We ignore the type error here: `del_multi` accepts a truncated key
+        # (when the key type is a tuple).
+        self.cache.del_multi((key,))  # type: ignore[arg-type]
 
     def invalidate_all(self) -> None:
         self.check_thread()
@@ -131,7 +271,16 @@ class DictionaryCache(Generic[KT, DKT, DV]):
         value: Dict[DKT, DV],
         fetched_keys: Optional[Iterable[DKT]] = None,
     ) -> None:
-        """Updates the entry in the cache
+        """Updates the entry in the cache.
+
+        Note: This does *not* invalidate any existing entries for the `key`.
+        In particular, if we add an entry for the cached "full dict" with
+        `fetched_keys=None`, existing entries for individual dict keys are
+        not invalidated. Likewise, adding entries for individual keys does
+        not invalidate any cached value for the full dict.
+
+        In other words: if the underlying data is *changed*, the cache must
+        be explicitly invalidated via `.invalidate()`.
 
         Args:
             sequence
@@ -149,20 +298,27 @@ class DictionaryCache(Generic[KT, DKT, DV]):
             # Only update the cache if the caches sequence number matches the
             # number that the cache had before the SELECT was started (SYN-369)
             if fetched_keys is None:
-                self._insert(key, value, set())
+                self.cache[(key, _FullCacheKey.KEY)] = value
             else:
-                self._update_or_insert(key, value, fetched_keys)
+                self._update_subset(key, value, fetched_keys)
 
-    def _update_or_insert(
-        self, key: KT, value: Dict[DKT, DV], known_absent: Iterable[DKT]
+    def _update_subset(
+        self, key: KT, value: Dict[DKT, DV], fetched_keys: Iterable[DKT]
     ) -> None:
-        # We pop and reinsert as we need to tell the cache the size may have
-        # changed
+        """Add the given dictionary values as explicit keys in the cache.
 
-        entry: DictionaryEntry = self.cache.pop(key, DictionaryEntry(False, set(), {}))
-        entry.value.update(value)
-        entry.known_absent.update(known_absent)
-        self.cache[key] = entry
+        Args:
+            key: top-level cache key
+            value: The dictionary with all the values that we should cache
+            fetched_keys: The full set of dict keys that were looked up. Any keys
+                here not in `value` should be marked as "known absent".
+        """
 
-    def _insert(self, key: KT, value: Dict[DKT, DV], known_absent: Set[DKT]) -> None:
-        self.cache[key] = DictionaryEntry(True, known_absent, value)
+        for dict_key, dict_value in value.items():
+            self.cache[(key, dict_key)] = _PerKeyValue(dict_value)
+
+        for dict_key in fetched_keys:
+            if dict_key in value:
+                continue
+
+            self.cache[(key, dict_key)] = _PerKeyValue(_Sentinel.sentinel)
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 31f41fec82..b3bdedb04c 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -25,8 +25,10 @@ from typing import (
     Collection,
     Dict,
     Generic,
+    Iterable,
     List,
     Optional,
+    Tuple,
     Type,
     TypeVar,
     Union,
@@ -44,7 +46,11 @@ from synapse.metrics.background_process_metrics import wrap_as_background_proces
 from synapse.metrics.jemalloc import get_jemalloc_stats
 from synapse.util import Clock, caches
 from synapse.util.caches import CacheMetric, EvictionReason, register_cache
-from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
+from synapse.util.caches.treecache import (
+    TreeCache,
+    iterate_tree_cache_entry,
+    iterate_tree_cache_items,
+)
 from synapse.util.linked_list import ListNode
 
 if TYPE_CHECKING:
@@ -537,6 +543,7 @@ class LruCache(Generic[KT, VT]):
             default: Literal[None] = None,
             callbacks: Collection[Callable[[], None]] = ...,
             update_metrics: bool = ...,
+            update_last_access: bool = ...,
         ) -> Optional[VT]:
             ...
 
@@ -546,6 +553,7 @@ class LruCache(Generic[KT, VT]):
             default: T,
             callbacks: Collection[Callable[[], None]] = ...,
             update_metrics: bool = ...,
+            update_last_access: bool = ...,
         ) -> Union[T, VT]:
             ...
 
@@ -555,10 +563,27 @@ class LruCache(Generic[KT, VT]):
             default: Optional[T] = None,
             callbacks: Collection[Callable[[], None]] = (),
             update_metrics: bool = True,
+            update_last_access: bool = True,
         ) -> Union[None, T, VT]:
+            """Look up a key in the cache
+
+            Args:
+                key
+                default
+                callbacks: A collection of callbacks that will fire when the
+                    node is removed from the cache (either due to invalidation
+                    or expiry).
+                update_metrics: Whether to update the hit rate metrics
+                update_last_access: Whether to update the last access metrics
+                    on a node if successfully fetched. These metrics are used
+                    to determine when to remove the node from the cache. Set
+                    to False if this fetch should *not* prevent a node from
+                    being expired.
+            """
             node = cache.get(key, None)
             if node is not None:
-                move_node_to_front(node)
+                if update_last_access:
+                    move_node_to_front(node)
                 node.add_callbacks(callbacks)
                 if update_metrics and metrics:
                     metrics.inc_hits()
@@ -568,6 +593,65 @@ class LruCache(Generic[KT, VT]):
                     metrics.inc_misses()
                 return default
 
+        @overload
+        def cache_get_multi(
+            key: tuple,
+            default: Literal[None] = None,
+            update_metrics: bool = True,
+        ) -> Union[None, Iterable[Tuple[KT, VT]]]:
+            ...
+
+        @overload
+        def cache_get_multi(
+            key: tuple,
+            default: T,
+            update_metrics: bool = True,
+        ) -> Union[T, Iterable[Tuple[KT, VT]]]:
+            ...
+
+        @synchronized
+        def cache_get_multi(
+            key: tuple,
+            default: Optional[T] = None,
+            update_metrics: bool = True,
+        ) -> Union[None, T, Iterable[Tuple[KT, VT]]]:
+            """Returns a generator yielding all entries under the given key.
+
+            Can only be used if backed by a tree cache.
+
+            Example:
+
+                cache = LruCache(10, cache_type=TreeCache)
+                cache[(1, 1)] = "a"
+                cache[(1, 2)] = "b"
+                cache[(2, 1)] = "c"
+
+                items = cache.get_multi((1,))
+                assert list(items) == [((1, 1), "a"), ((1, 2), "b")]
+
+            Returns:
+                Either default if the key doesn't exist, or a generator of the
+                key/value pairs.
+            """
+
+            assert isinstance(cache, TreeCache)
+
+            node = cache.get(key, None)
+            if node is not None:
+                if update_metrics and metrics:
+                    metrics.inc_hits()
+
+                # We store entries in the `TreeCache` with values of type `_Node`,
+                # which we need to unwrap.
+                return (
+                    (full_key, lru_node.value)
+                    for full_key, lru_node in iterate_tree_cache_items(key, node)
+                )
+            else:
+                if update_metrics and metrics:
+                    metrics.inc_misses()
+                return default
+
         @synchronized
         def cache_set(
             key: KT, value: VT, callbacks: Collection[Callable[[], None]] = ()
@@ -674,6 +758,8 @@ class LruCache(Generic[KT, VT]):
         self.setdefault = cache_set_default
         self.pop = cache_pop
         self.del_multi = cache_del_multi
+        if cache_type is TreeCache:
+            self.get_multi = cache_get_multi
         # `invalidate` is exposed for consistency with DeferredCache, so that it can be
         # invalidated by the cache invalidation replication stream.
         self.invalidate = cache_del_multi
diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py
index e78305f787..c1b8ec0c73 100644
--- a/synapse/util/caches/treecache.py
+++ b/synapse/util/caches/treecache.py
@@ -64,6 +64,15 @@ class TreeCache:
         self.size += 1
 
     def get(self, key, default=None):
+        """When `key` is a full key, fetches the value for the given key (if
+        any).
+
+        If `key` is only a partial key (i.e. a truncated tuple) then returns a
+        `TreeCacheNode`, which can be passed to the `iterate_tree_cache_*`
+        functions to iterate over all entries in the cache with keys that start
+        with the given partial key.
+        """
+
         node = self.root
         for k in key[:-1]:
             node = node.get(k, None)
@@ -139,3 +148,32 @@ def iterate_tree_cache_entry(d):
             yield from iterate_tree_cache_entry(value_d)
     else:
         yield d
+
+
+def iterate_tree_cache_items(key, value):
+    """Helper function to iterate over the leaves of a tree, i.e. a dict of that
+    can contain dicts.
+
+    The provided key is a tuple that will get prepended to the returned keys.
+
+    Example:
+
+        cache = TreeCache()
+        cache[(1, 1)] = "a"
+        cache[(1, 2)] = "b"
+        cache[(2, 1)] = "c"
+
+        tree_node = cache.get((1,))
+
+        items = iterate_tree_cache_items((1,), tree_node)
+        assert list(items) == [((1, 1), "a"), ((1, 2), "b")]
+
+    Returns:
+        A generator yielding key/value pairs.
+    """
+    if isinstance(value, TreeCacheNode):
+        for sub_key, sub_value in value.items():
+            yield from iterate_tree_cache_items((*key, sub_key), sub_value)
+    else:
+        # we've reached a leaf of the tree.
+        yield key, value
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 8043bdbde2..5564161750 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -369,8 +369,8 @@ class StateStoreTestCase(HomeserverTestCase):
         state_dict_ids = cache_entry.value
 
         self.assertEqual(cache_entry.full, False)
-        self.assertEqual(cache_entry.known_absent, {(e1.type, e1.state_key)})
-        self.assertDictEqual(state_dict_ids, {(e1.type, e1.state_key): e1.event_id})
+        self.assertEqual(cache_entry.known_absent, set())
+        self.assertDictEqual(state_dict_ids, {})
 
         ############################################
         # test that things work with a partial cache
@@ -387,7 +387,7 @@ class StateStoreTestCase(HomeserverTestCase):
         )
 
         self.assertEqual(is_all, False)
-        self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict)
+        self.assertDictEqual({}, state_dict)
 
         room_id = self.room.to_string()
         (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
@@ -412,7 +412,7 @@ class StateStoreTestCase(HomeserverTestCase):
         )
 
         self.assertEqual(is_all, False)
-        self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict)
+        self.assertDictEqual({}, state_dict)
 
         (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
@@ -443,7 +443,7 @@ class StateStoreTestCase(HomeserverTestCase):
         )
 
         self.assertEqual(is_all, False)
-        self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict)
+        self.assertDictEqual({}, state_dict)
 
         (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py
index bee66dee43..e8b6246ab5 100644
--- a/tests/util/test_dict_cache.py
+++ b/tests/util/test_dict_cache.py
@@ -20,7 +20,7 @@ from tests import unittest
 
 class DictCacheTestCase(unittest.TestCase):
     def setUp(self):
-        self.cache = DictionaryCache("foobar")
+        self.cache = DictionaryCache("foobar", max_entries=10)
 
     def test_simple_cache_hit_full(self):
         key = "test_simple_cache_hit_full"
@@ -76,13 +76,13 @@ class DictCacheTestCase(unittest.TestCase):
 
         seq = self.cache.sequence
         test_value_1 = {"test": "test_simple_cache_hit_miss_partial"}
-        self.cache.update(seq, key, test_value_1, fetched_keys=set("test"))
+        self.cache.update(seq, key, test_value_1, fetched_keys={"test"})
 
         seq = self.cache.sequence
         test_value_2 = {"test2": "test_simple_cache_hit_miss_partial2"}
-        self.cache.update(seq, key, test_value_2, fetched_keys=set("test2"))
+        self.cache.update(seq, key, test_value_2, fetched_keys={"test2"})
 
-        c = self.cache.get(key)
+        c = self.cache.get(key, dict_keys=["test", "test2"])
         self.assertEqual(
             {
                 "test": "test_simple_cache_hit_miss_partial",
@@ -90,3 +90,30 @@ class DictCacheTestCase(unittest.TestCase):
             },
             c.value,
         )
+        self.assertEqual(c.full, False)
+
+    def test_invalidation(self):
+        """Test that the partial dict and full dicts get invalidated
+        separately.
+        """
+        key = "some_key"
+
+        seq = self.cache.sequence
+        # start by populating a "full dict" entry
+        self.cache.update(seq, key, {"a": "b", "c": "d"})
+
+        # add a bunch of individual entries, also keeping the individual
+        # entry for "a" warm.
+        for i in range(20):
+            self.cache.get(key, ["a"])
+            self.cache.update(seq, f"key{i}", {1: 2})
+
+        # We should have evicted the full dict...
+        r = self.cache.get(key)
+        self.assertFalse(r.full)
+        self.assertTrue("c" not in r.value)
+
+        # ... but kept the "a" entry that we kept querying.
+        r = self.cache.get(key, dict_keys=["a"])
+        self.assertFalse(r.full)
+        self.assertEqual(r.value, {"a": "b"})

From 86e366a46e13710c013141094c407f185ac3fbe3 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Thu, 21 Jul 2022 19:56:45 +0200
Subject: [PATCH 172/178] Remove old empty/redundant slaved stores. (#13349)

---
 changelog.d/13349.misc                        |  1 +
 synapse/app/admin_cmd.py                      | 26 +++++----
 synapse/app/generic_worker.py                 | 34 ++++++-----
 .../replication/slave/storage/account_data.py | 21 -------
 .../replication/slave/storage/appservice.py   | 25 ---------
 .../replication/slave/storage/deviceinbox.py  | 19 -------
 .../replication/slave/storage/directory.py    | 19 -------
 synapse/replication/slave/storage/profile.py  | 19 -------
 synapse/replication/slave/storage/receipts.py | 20 -------
 .../replication/slave/storage/registration.py | 19 -------
 .../slave/storage/test_account_data.py        | 42 --------------
 .../slave => }/storage/test_receipts.py       | 56 +++++++++----------
 12 files changed, 63 insertions(+), 238 deletions(-)
 create mode 100644 changelog.d/13349.misc
 delete mode 100644 synapse/replication/slave/storage/account_data.py
 delete mode 100644 synapse/replication/slave/storage/appservice.py
 delete mode 100644 synapse/replication/slave/storage/deviceinbox.py
 delete mode 100644 synapse/replication/slave/storage/directory.py
 delete mode 100644 synapse/replication/slave/storage/profile.py
 delete mode 100644 synapse/replication/slave/storage/receipts.py
 delete mode 100644 synapse/replication/slave/storage/registration.py
 delete mode 100644 tests/replication/slave/storage/test_account_data.py
 rename tests/{replication/slave => }/storage/test_receipts.py (84%)

diff --git a/changelog.d/13349.misc b/changelog.d/13349.misc
new file mode 100644
index 0000000000..4df9a9f6d7
--- /dev/null
+++ b/changelog.d/13349.misc
@@ -0,0 +1 @@
+Remove old base slaved store and de-duplicate cache ID generators. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 53ec33bcd1..8a583d3ec6 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -28,18 +28,22 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.events import EventBase
 from synapse.handlers.admin import ExfiltrationWriter
-from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
-from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
-from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
 from synapse.replication.slave.storage.devices import SlavedDeviceStore
 from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.filtering import SlavedFilteringStore
 from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
-from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
-from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.server import HomeServer
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
+from synapse.storage.databases.main.account_data import AccountDataWorkerStore
+from synapse.storage.databases.main.appservice import (
+    ApplicationServiceTransactionWorkerStore,
+    ApplicationServiceWorkerStore,
+)
+from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
+from synapse.storage.databases.main.registration import RegistrationWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
+from synapse.storage.databases.main.tags import TagsWorkerStore
 from synapse.types import StateMap
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.logcontext import LoggingContext
@@ -48,15 +52,17 @@ logger = logging.getLogger("synapse.app.admin_cmd")
 
 
 class AdminCmdSlavedStore(
-    SlavedReceiptsStore,
-    SlavedAccountDataStore,
-    SlavedApplicationServiceStore,
-    SlavedRegistrationStore,
     SlavedFilteringStore,
-    SlavedDeviceInboxStore,
     SlavedDeviceStore,
     SlavedPushRuleStore,
     SlavedEventStore,
+    TagsWorkerStore,
+    DeviceInboxWorkerStore,
+    AccountDataWorkerStore,
+    ApplicationServiceTransactionWorkerStore,
+    ApplicationServiceWorkerStore,
+    RegistrationWorkerStore,
+    ReceiptsWorkerStore,
     RoomWorkerStore,
 ):
     def __init__(
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 0c16584abc..42d1f6d219 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -48,19 +48,12 @@ from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
-from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
-from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
-from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
 from synapse.replication.slave.storage.devices import SlavedDeviceStore
-from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.filtering import SlavedFilteringStore
 from synapse.replication.slave.storage.keys import SlavedKeyStore
-from synapse.replication.slave.storage.profile import SlavedProfileStore
 from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
 from synapse.replication.slave.storage.pushers import SlavedPusherStore
-from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
-from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client import (
     account_data,
@@ -99,8 +92,15 @@ from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
 from synapse.rest.well_known import well_known_resource
 from synapse.server import HomeServer
+from synapse.storage.databases.main.account_data import AccountDataWorkerStore
+from synapse.storage.databases.main.appservice import (
+    ApplicationServiceTransactionWorkerStore,
+    ApplicationServiceWorkerStore,
+)
 from synapse.storage.databases.main.censor_events import CensorEventsStore
 from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
+from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
+from synapse.storage.databases.main.directory import DirectoryWorkerStore
 from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyStore
 from synapse.storage.databases.main.lock import LockStore
 from synapse.storage.databases.main.media_repository import MediaRepositoryStore
@@ -109,11 +109,15 @@ from synapse.storage.databases.main.monthly_active_users import (
     MonthlyActiveUsersWorkerStore,
 )
 from synapse.storage.databases.main.presence import PresenceStore
+from synapse.storage.databases.main.profile import ProfileWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
+from synapse.storage.databases.main.registration import RegistrationWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
 from synapse.storage.databases.main.room_batch import RoomBatchStore
 from synapse.storage.databases.main.search import SearchStore
 from synapse.storage.databases.main.session import SessionStore
 from synapse.storage.databases.main.stats import StatsStore
+from synapse.storage.databases.main.tags import TagsWorkerStore
 from synapse.storage.databases.main.transactions import TransactionWorkerStore
 from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.databases.main.user_directory import UserDirectoryStore
@@ -226,11 +230,11 @@ class GenericWorkerSlavedStore(
     UIAuthWorkerStore,
     EndToEndRoomKeyStore,
     PresenceStore,
-    SlavedDeviceInboxStore,
+    DeviceInboxWorkerStore,
     SlavedDeviceStore,
-    SlavedReceiptsStore,
     SlavedPushRuleStore,
-    SlavedAccountDataStore,
+    TagsWorkerStore,
+    AccountDataWorkerStore,
     SlavedPusherStore,
     CensorEventsStore,
     ClientIpWorkerStore,
@@ -238,14 +242,16 @@ class GenericWorkerSlavedStore(
     SlavedKeyStore,
     RoomWorkerStore,
     RoomBatchStore,
-    DirectoryStore,
-    SlavedApplicationServiceStore,
-    SlavedRegistrationStore,
-    SlavedProfileStore,
+    DirectoryWorkerStore,
+    ApplicationServiceTransactionWorkerStore,
+    ApplicationServiceWorkerStore,
+    ProfileWorkerStore,
     SlavedFilteringStore,
     MonthlyActiveUsersWorkerStore,
     MediaRepositoryStore,
     ServerMetricsStore,
+    ReceiptsWorkerStore,
+    RegistrationWorkerStore,
     SearchStore,
     TransactionWorkerStore,
     LockStore,
diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py
deleted file mode 100644
index 57d3237981..0000000000
--- a/synapse/replication/slave/storage/account_data.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.account_data import AccountDataWorkerStore
-from synapse.storage.databases.main.tags import TagsWorkerStore
-
-
-class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore):
-    pass
diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py
deleted file mode 100644
index 29f50c0add..0000000000
--- a/synapse/replication/slave/storage/appservice.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.appservice import (
-    ApplicationServiceTransactionWorkerStore,
-    ApplicationServiceWorkerStore,
-)
-
-
-class SlavedApplicationServiceStore(
-    ApplicationServiceTransactionWorkerStore, ApplicationServiceWorkerStore
-):
-    pass
diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py
deleted file mode 100644
index df9e4d8f45..0000000000
--- a/synapse/replication/slave/storage/deviceinbox.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
-
-
-class SlavedDeviceInboxStore(DeviceInboxWorkerStore):
-    pass
diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py
deleted file mode 100644
index ca716df3df..0000000000
--- a/synapse/replication/slave/storage/directory.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.directory import DirectoryWorkerStore
-
-
-class DirectoryStore(DirectoryWorkerStore):
-    pass
diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py
deleted file mode 100644
index a774a2ff48..0000000000
--- a/synapse/replication/slave/storage/profile.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.profile import ProfileWorkerStore
-
-
-class SlavedProfileStore(ProfileWorkerStore):
-    pass
diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py
deleted file mode 100644
index 407862a2b2..0000000000
--- a/synapse/replication/slave/storage/receipts.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
-
-
-class SlavedReceiptsStore(ReceiptsWorkerStore):
-    pass
diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py
deleted file mode 100644
index 52c593e59d..0000000000
--- a/synapse/replication/slave/storage/registration.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.registration import RegistrationWorkerStore
-
-
-class SlavedRegistrationStore(RegistrationWorkerStore):
-    pass
diff --git a/tests/replication/slave/storage/test_account_data.py b/tests/replication/slave/storage/test_account_data.py
deleted file mode 100644
index 1524087c43..0000000000
--- a/tests/replication/slave/storage/test_account_data.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
-
-from ._base import BaseSlavedStoreTestCase
-
-USER_ID = "@feeling:blue"
-TYPE = "my.type"
-
-
-class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase):
-
-    STORE_TYPE = SlavedAccountDataStore
-
-    def test_user_account_data(self):
-        self.get_success(
-            self.master_store.add_account_data_for_user(USER_ID, TYPE, {"a": 1})
-        )
-        self.replicate()
-        self.check(
-            "get_global_account_data_by_type_for_user", [USER_ID, TYPE], {"a": 1}
-        )
-
-        self.get_success(
-            self.master_store.add_account_data_for_user(USER_ID, TYPE, {"a": 2})
-        )
-        self.replicate()
-        self.check(
-            "get_global_account_data_by_type_for_user", [USER_ID, TYPE], {"a": 2}
-        )
diff --git a/tests/replication/slave/storage/test_receipts.py b/tests/storage/test_receipts.py
similarity index 84%
rename from tests/replication/slave/storage/test_receipts.py
rename to tests/storage/test_receipts.py
index 19f57115a1..b1a8f8bba7 100644
--- a/tests/replication/slave/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -13,23 +13,21 @@
 # limitations under the License.
 
 from synapse.api.constants import ReceiptTypes
-from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.types import UserID, create_requester
 
 from tests.test_utils.event_injection import create_event
-
-from ._base import BaseSlavedStoreTestCase
+from tests.unittest import HomeserverTestCase
 
 OTHER_USER_ID = "@other:test"
 OUR_USER_ID = "@our:test"
 
 
-class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
-
-    STORE_TYPE = SlavedReceiptsStore
-
+class ReceiptTestCase(HomeserverTestCase):
     def prepare(self, reactor, clock, homeserver):
         super().prepare(reactor, clock, homeserver)
+
+        self.store = homeserver.get_datastores().main
+
         self.room_creator = homeserver.get_room_creation_handler()
         self.persist_event_storage_controller = (
             self.hs.get_storage_controllers().persistence
@@ -87,14 +85,14 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
     def test_return_empty_with_no_data(self):
         res = self.get_success(
-            self.master_store.get_receipts_for_user(
+            self.store.get_receipts_for_user(
                 OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
             )
         )
         self.assertEqual(res, {})
 
         res = self.get_success(
-            self.master_store.get_receipts_for_user_with_orderings(
+            self.store.get_receipts_for_user_with_orderings(
                 OUR_USER_ID,
                 [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
             )
@@ -102,7 +100,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
         self.assertEqual(res, {})
 
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID,
                 self.room_id1,
                 [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
@@ -121,20 +119,20 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Send public read receipt for the first event
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {}
             )
         )
         # Send private read receipt for the second event
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
             )
         )
 
         # Test we get the latest event when we want both private and public receipts
         res = self.get_success(
-            self.master_store.get_receipts_for_user(
+            self.store.get_receipts_for_user(
                 OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
             )
         )
@@ -142,26 +140,24 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test we get the older event when we want only public receipt
         res = self.get_success(
-            self.master_store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ])
+            self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ])
         )
         self.assertEqual(res, {self.room_id1: event1_1_id})
 
         # Test we get the latest event when we want only the public receipt
         res = self.get_success(
-            self.master_store.get_receipts_for_user(
-                OUR_USER_ID, [ReceiptTypes.READ_PRIVATE]
-            )
+            self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ_PRIVATE])
         )
         self.assertEqual(res, {self.room_id1: event1_2_id})
 
         # Test receipt updating
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {}
             )
         )
         res = self.get_success(
-            self.master_store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ])
+            self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ])
         )
         self.assertEqual(res, {self.room_id1: event1_2_id})
 
@@ -172,12 +168,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test new room is reflected in what the method returns
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
             )
         )
         res = self.get_success(
-            self.master_store.get_receipts_for_user(
+            self.store.get_receipts_for_user(
                 OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
             )
         )
@@ -194,20 +190,20 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Send public read receipt for the first event
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {}
             )
         )
         # Send private read receipt for the second event
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
             )
         )
 
         # Test we get the latest event when we want both private and public receipts
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID,
                 self.room_id1,
                 [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
@@ -217,7 +213,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test we get the older event when we want only public receipt
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID, self.room_id1, [ReceiptTypes.READ]
             )
         )
@@ -225,7 +221,7 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test we get the latest event when we want only the private receipt
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE]
             )
         )
@@ -233,12 +229,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test receipt updating
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {}
             )
         )
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID, self.room_id1, [ReceiptTypes.READ]
             )
         )
@@ -251,12 +247,12 @@ class SlavedReceiptTestCase(BaseSlavedStoreTestCase):
 
         # Test new room is reflected in what the method returns
         self.get_success(
-            self.master_store.insert_receipt(
+            self.store.insert_receipt(
                 self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
             )
         )
         res = self.get_success(
-            self.master_store.get_last_receipt_event_id_for_user(
+            self.store.get_last_receipt_event_id_for_user(
                 OUR_USER_ID,
                 self.room_id2,
                 [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],

From 158782c3ce1eb92e98df50645b03afcab4f22db0 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 22 Jul 2022 10:13:01 +0100
Subject: [PATCH 173/178] Skip soft fail checks for rooms with partial state
 (#13354)

When a room has the partial state flag, we may not have an accurate
`m.room.member` event for event senders in the room's current state, and
so cannot perform soft fail checks correctly. Skip the soft fail check
entirely in this case.

As an alternative, we could block until we have full state, but that
would prevent us from receiving incoming events over federation, which
is undesirable.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13354.misc               |  1 +
 synapse/handlers/federation_event.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 changelog.d/13354.misc

diff --git a/changelog.d/13354.misc b/changelog.d/13354.misc
new file mode 100644
index 0000000000..e08ee7866a
--- /dev/null
+++ b/changelog.d/13354.misc
@@ -0,0 +1 @@
+Faster room joins: skip soft fail checks while Synapse only has partial room state, since the current membership of event senders may not be accurately known.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index a5f4ce7c8a..9d9f1696f2 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1664,11 +1664,21 @@ class FederationEventHandler:
         """Checks if we should soft fail the event; if so, marks the event as
         such.
 
+        Does nothing for events in rooms with partial state, since we may not have an
+        accurate membership event for the sender in the current state.
+
         Args:
             event
             state_ids: The state at the event if we don't have all the event's prev events
             origin: The host the event originates from.
         """
+        if await self._store.is_partial_state_room(event.room_id):
+            # We might not know the sender's membership in the current state, so don't
+            # soft fail anything. Even if we do have a membership for the sender in the
+            # current state, it may have been derived from state resolution between
+            # partial and full state and may not be accurate.
+            return
+
         extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id)
         extrem_ids = set(extrem_ids_list)
         prev_event_ids = set(event.prev_event_ids())

From 0fa41a7b172cb157c6d6df41e7ae4bf5cbdc0d36 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 22 Jul 2022 10:26:09 +0100
Subject: [PATCH 174/178] Update locked frozendict version to 2.3.3 (#13352)

frozendict 2.3.3 includes fixes for memory leaks that get triggered during `/sync`.
---
 changelog.d/13352.bugfix |  1 +
 docs/upgrade.md          |  9 +++++++++
 poetry.lock              | 36 ++++++++++++++++++------------------
 3 files changed, 28 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/13352.bugfix

diff --git a/changelog.d/13352.bugfix b/changelog.d/13352.bugfix
new file mode 100644
index 0000000000..8128714299
--- /dev/null
+++ b/changelog.d/13352.bugfix
@@ -0,0 +1 @@
+Update locked version of `frozendict` to 2.3.3, which has fixes for memory leaks affecting `/sync`.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 2c7c258909..fadb8e7ffb 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -114,6 +114,15 @@ vice versa.
 Once all workers are upgraded to v1.64 (or downgraded to v1.63), event
 replication will resume as normal.
 
+## frozendict release
+
+[frozendict 2.3.3](https://github.com/Marco-Sulla/python-frozendict/releases/tag/v2.3.3)
+has recently been released, which fixes a memory leak that occurs during `/sync`
+requests. We advise server administrators who installed Synapse via pip to upgrade
+frozendict with `pip install --upgrade frozendict`. The Docker image
+`matrixdotorg/synapse` and the Debian packages from `packages.matrix.org` already
+include the updated library.
+
 # Upgrading to v1.62.0
 
 ## New signatures for spam checker callbacks
diff --git a/poetry.lock b/poetry.lock
index 41ab40edd1..b62c24ae16 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -290,7 +290,7 @@ importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "frozendict"
-version = "2.3.2"
+version = "2.3.3"
 description = "A simple immutable dictionary"
 category = "main"
 optional = false
@@ -1753,23 +1753,23 @@ flake8-comprehensions = [
     {file = "flake8_comprehensions-3.8.0-py3-none-any.whl", hash = "sha256:9406314803abe1193c064544ab14fdc43c58424c0882f6ff8a581eb73fc9bb58"},
 ]
 frozendict = [
-    {file = "frozendict-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4fb171d1e84d17335365877e19d17440373b47ca74a73c06f65ac0b16d01e87f"},
-    {file = "frozendict-2.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a3640e9d7533d164160b758351aa49d9e85bbe0bd76d219d4021e90ffa6a52"},
-    {file = "frozendict-2.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:87cfd00fafbc147d8cd2590d1109b7db8ac8d7d5bdaa708ba46caee132b55d4d"},
-    {file = "frozendict-2.3.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fb09761e093cfabb2f179dbfdb2521e1ec5701df714d1eb5c51fa7849027be19"},
-    {file = "frozendict-2.3.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82176dc7adf01cf8f0193e909401939415a230a1853f4a672ec1629a06ceae18"},
-    {file = "frozendict-2.3.2-cp36-cp36m-win_amd64.whl", hash = "sha256:c1c70826aa4a50fa283fe161834ac4a3ac7c753902c980bb8b595b0998a38ddb"},
-    {file = "frozendict-2.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1db5035ddbed995badd1a62c4102b5e207b5aeb24472df2c60aba79639d7996b"},
-    {file = "frozendict-2.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4246fc4cb1413645ba4d3513939b90d979a5bae724be605a10b2b26ee12f839c"},
-    {file = "frozendict-2.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:680cd42fb0a255da1ce45678ccbd7f69da750d5243809524ebe8f45b2eda6e6b"},
-    {file = "frozendict-2.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a7f3a181d6722c92a9fab12d0c5c2b006a18ca5666098531f316d1e1c8984e3"},
-    {file = "frozendict-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1cb866eabb3c1384a7fe88e1e1033e2b6623073589012ab637c552bf03f6364"},
-    {file = "frozendict-2.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:952c5e5e664578c5c2ce8489ee0ab6a1855da02b58ef593ee728fc10d672641a"},
-    {file = "frozendict-2.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:608b77904cd0117cd816df605a80d0043a5326ee62529327d2136c792165a823"},
-    {file = "frozendict-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eed41fd326f0bcc779837d8d9e1374da1bc9857fe3b9f2910195bbd5fff3aeb"},
-    {file = "frozendict-2.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:bde28db6b5868dd3c45b3555f9d1dc5a1cca6d93591502fa5dcecce0dde6a335"},
-    {file = "frozendict-2.3.2-py3-none-any.whl", hash = "sha256:6882a9bbe08ab9b5ff96ce11bdff3fe40b114b9813bc6801261e2a7b45e20012"},
-    {file = "frozendict-2.3.2.tar.gz", hash = "sha256:7fac4542f0a13fbe704db4942f41ba3abffec5af8b100025973e59dff6a09d0d"},
+    {file = "frozendict-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39942914c1217a5a49c7551495a103b3dbd216e19413687e003b859c6b0ebc12"},
+    {file = "frozendict-2.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5589256058b31f2b91419fa30b8dc62dbdefe7710e688a3fd5b43849161eecc9"},
+    {file = "frozendict-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:35eb7e59e287c41f4f712d4d3d2333354175b155d217b97c99c201d2d8920790"},
+    {file = "frozendict-2.3.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:310aaf81793abf4f471895e6fe65e0e74a28a2aaf7b25c2ba6ccd4e35af06842"},
+    {file = "frozendict-2.3.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c353c11010a986566a0cb37f9a783c560ffff7d67d5e7fd52221fb03757cdc43"},
+    {file = "frozendict-2.3.3-cp36-cp36m-win_amd64.whl", hash = "sha256:15b5f82aad108125336593cec1b6420c638bf45f449c57e50949fc7654ea5a41"},
+    {file = "frozendict-2.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a4737e5257756bd6b877504ff50185b705db577b5330d53040a6cf6417bb3cdb"},
+    {file = "frozendict-2.3.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a14c11e33e8b0bc09e07bba3732c77a502c39edb8c3959fd9a0e490e031158"},
+    {file = "frozendict-2.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:027952d1698ac9c766ef43711226b178cdd49d2acbdff396936639ad1d2a5615"},
+    {file = "frozendict-2.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ef818d66c85098a37cf42509545a4ba7dd0c4c679d6262123a8dc14cc474bab7"},
+    {file = "frozendict-2.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:812279f2b270c980112dc4e367b168054f937108f8044eced4199e0ab2945a37"},
+    {file = "frozendict-2.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:c1fb7efbfebc2075f781be3d9774e4ba6ce4fc399148b02097f68d4b3c4bc00a"},
+    {file = "frozendict-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0b46d4bf95bce843c0151959d54c3e5b8d0ce29cb44794e820b3ec980d63eee"},
+    {file = "frozendict-2.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38c4660f37fcc70a32ff997fe58e40b3fcc60b2017b286e33828efaa16b01308"},
+    {file = "frozendict-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:919e3609844fece11ab18bcbf28a3ed20f8108ad4149d7927d413687f281c6c9"},
+    {file = "frozendict-2.3.3-py3-none-any.whl", hash = "sha256:f988b482d08972a196664718167a993a61c9e9f6fe7b0ca2443570b5f20ca44a"},
+    {file = "frozendict-2.3.3.tar.gz", hash = "sha256:398539c52af3c647d103185bbaa1291679f0507ad035fe3bab2a8b0366d52cf1"},
 ]
 gitdb = [
     {file = "gitdb-4.0.9-py3-none-any.whl", hash = "sha256:8033ad4e853066ba6ca92050b9df2f89301b8fc8bf7e9324d412a63f8bf1a8fd"},

From c7c84b81e3ec3d66f3a57a8d6ba3e58dd4c81ecc Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 22 Jul 2022 13:50:20 +0100
Subject: [PATCH 175/178] Update config_documentation.md (#13364)

"changed in" goes before the example
---
 docs/usage/configuration/config_documentation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 11d1574484..a10f6662eb 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1530,6 +1530,8 @@ cannot *receive* more than a burst of 5 invites at a time.
 
 In contrast, the `rc_invites.per_issuer` limit applies to the *issuer* of the invite, meaning that a `rc_invite.per_issuer.burst_count` of 5 mandates that single user cannot *send* more than a burst of 5 invites at a time.
 
+_Changed in version 1.63:_ added the `per_issuer` limit.
+
 Example configuration:
 ```yaml
 rc_invites:
@@ -1544,8 +1546,6 @@ rc_invites:
     burst_count: 5
 ```
 
-_Changed in version 1.63:_ added the `per_issuer` limit.
-
 ---
 ### `rc_third_party_invite`
 

From 357561c1a2b2da6d1a7ad1e2340217fee18cc2b1 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 22 Jul 2022 16:00:11 -0500
Subject: [PATCH 176/178] Backfill remote event fetched by MSC3030  so we can
 paginate  from it later (#13205)

Depends on https://github.com/matrix-org/synapse/pull/13320

Complement tests: https://github.com/matrix-org/complement/pull/406

We could use the same method to backfill for `/context` as well in the future, see https://github.com/matrix-org/synapse/issues/3848
---
 changelog.d/13205.feature            |  1 +
 synapse/handlers/federation_event.py | 49 ++++++++++++++++++++++-
 synapse/handlers/room.py             | 59 +++++++++++++++++++++-------
 3 files changed, 94 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13205.feature

diff --git a/changelog.d/13205.feature b/changelog.d/13205.feature
new file mode 100644
index 0000000000..d89aa9aa75
--- /dev/null
+++ b/changelog.d/13205.feature
@@ -0,0 +1 @@
+Allow pagination from remote event after discovering it from MSC3030 `/timestamp_to_event`.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9d9f1696f2..16f20c8be7 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -793,7 +793,7 @@ class FederationEventHandler:
         if existing:
             if not existing.internal_metadata.is_outlier():
                 logger.info(
-                    "Ignoring received event %s which we have already seen",
+                    "_process_pulled_event: Ignoring received event %s which we have already seen",
                     event_id,
                 )
                 return
@@ -1329,6 +1329,53 @@ class FederationEventHandler:
             marker_event,
         )
 
+    async def backfill_event_id(
+        self, destination: str, room_id: str, event_id: str
+    ) -> EventBase:
+        """Backfill a single event and persist it as a non-outlier which means
+        we also pull in all of the state and auth events necessary for it.
+
+        Args:
+            destination: The homeserver to pull the given event_id from.
+            room_id: The room where the event is from.
+            event_id: The event ID to backfill.
+
+        Raises:
+            FederationError if we are unable to find the event from the destination
+        """
+        logger.info(
+            "backfill_event_id: event_id=%s from destination=%s", event_id, destination
+        )
+
+        room_version = await self._store.get_room_version(room_id)
+
+        event_from_response = await self._federation_client.get_pdu(
+            [destination],
+            event_id,
+            room_version,
+        )
+
+        if not event_from_response:
+            raise FederationError(
+                "ERROR",
+                404,
+                "Unable to find event_id=%s from destination=%s to backfill."
+                % (event_id, destination),
+                affected=event_id,
+            )
+
+        # Persist the event we just fetched, including pulling all of the state
+        # and auth events to de-outlier it. This also sets up the necessary
+        # `state_groups` for the event.
+        await self._process_pulled_events(
+            destination,
+            [event_from_response],
+            # Prevent notifications going to clients
+            backfilled=True,
+        )
+
+        return event_from_response
+
     async def _get_events_and_persist(
         self, destination: str, room_id: str, event_ids: Collection[str]
     ) -> None:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 978d3ee39f..55395457c3 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1384,6 +1384,7 @@ class TimestampLookupHandler:
         self.store = hs.get_datastores().main
         self.state_handler = hs.get_state_handler()
         self.federation_client = hs.get_federation_client()
+        self.federation_event_handler = hs.get_federation_event_handler()
         self._storage_controllers = hs.get_storage_controllers()
 
     async def get_event_for_timestamp(
@@ -1479,38 +1480,68 @@ class TimestampLookupHandler:
                         remote_response,
                     )
 
-                    # TODO: Do we want to persist this as an extremity?
-                    # TODO: I think ideally, we would try to backfill from
-                    # this event and run this whole
-                    # `get_event_for_timestamp` function again to make sure
-                    # they didn't give us an event from their gappy history.
                     remote_event_id = remote_response.event_id
-                    origin_server_ts = remote_response.origin_server_ts
+                    remote_origin_server_ts = remote_response.origin_server_ts
+
+                    # Backfill this event so we can get a pagination token for
+                    # it with `/context` and paginate `/messages` from this
+                    # point.
+                    #
+                    # TODO: The requested timestamp may lie in a part of the
+                    #   event graph that the remote server *also* didn't have,
+                    #   in which case they will have returned another event
+                    #   which may be nowhere near the requested timestamp. In
+                    #   the future, we may need to reconcile that gap and ask
+                    #   other homeservers, and/or extend `/timestamp_to_event`
+                    #   to return events on *both* sides of the timestamp to
+                    #   help reconcile the gap faster.
+                    remote_event = (
+                        await self.federation_event_handler.backfill_event_id(
+                            domain, room_id, remote_event_id
+                        )
+                    )
+
+                    # XXX: When we see that the remote server is not trustworthy,
+                    # maybe we should not ask them first in the future.
+                    if remote_origin_server_ts != remote_event.origin_server_ts:
+                        logger.info(
+                            "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.",
+                            domain,
+                            remote_event_id,
+                            remote_origin_server_ts,
+                            remote_event.origin_server_ts,
+                        )
 
                     # Only return the remote event if it's closer than the local event
                     if not local_event or (
-                        abs(origin_server_ts - timestamp)
+                        abs(remote_event.origin_server_ts - timestamp)
                         < abs(local_event.origin_server_ts - timestamp)
                     ):
-                        return remote_event_id, origin_server_ts
+                        logger.info(
+                            "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)",
+                            remote_event_id,
+                            remote_event.origin_server_ts,
+                            timestamp,
+                            local_event.event_id if local_event else None,
+                            local_event.origin_server_ts if local_event else None,
+                        )
+                        return remote_event_id, remote_origin_server_ts
                 except (HttpResponseException, InvalidResponseError) as ex:
                     # Let's not put a high priority on some other homeserver
                     # failing to respond or giving a random response
                     logger.debug(
-                        "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s",
+                        "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s",
                         domain,
                         type(ex).__name__,
                         ex,
                         ex.args,
                     )
-                except Exception as ex:
+                except Exception:
                     # But we do want to see some exceptions in our code
                     logger.warning(
-                        "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s",
+                        "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception",
                         domain,
-                        type(ex).__name__,
-                        ex,
-                        ex.args,
+                        exc_info=True,
                     )
 
         # To appease mypy, we have to add both of these conditions to check for

From 43adf2521cc6952dcc7f0e3006dbfe52db85721a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 25 Jul 2022 10:21:06 +0100
Subject: [PATCH 177/178] Refactor presence so we can prune user in room caches
 (#13313)

See #10826 and #10786 for context as to why we had to disable pruning on
those caches.

Now that `get_users_who_share_room_with_user` is called frequently only
for presence, we just need to make calls to it less frequent and then we
can remove the various levels of caching that is going on.
---
 changelog.d/13313.misc                       |   1 +
 synapse/handlers/presence.py                 | 112 ++++++-------------
 synapse/storage/_base.py                     |   4 +
 synapse/storage/databases/main/roommember.py |  83 +++++++++++---
 4 files changed, 109 insertions(+), 91 deletions(-)
 create mode 100644 changelog.d/13313.misc

diff --git a/changelog.d/13313.misc b/changelog.d/13313.misc
new file mode 100644
index 0000000000..0f3c1f0afd
--- /dev/null
+++ b/changelog.d/13313.misc
@@ -0,0 +1 @@
+Change `get_users_in_room` and `get_rooms_for_user` caches to enable pruning of old entries.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 895ea63ed3..741504ba9f 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -34,7 +34,6 @@ from typing import (
     Callable,
     Collection,
     Dict,
-    FrozenSet,
     Generator,
     Iterable,
     List,
@@ -42,7 +41,6 @@ from typing import (
     Set,
     Tuple,
     Type,
-    Union,
 )
 
 from prometheus_client import Counter
@@ -68,7 +66,6 @@ from synapse.storage.databases.main import DataStore
 from synapse.streams import EventSource
 from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id
 from synapse.util.async_helpers import Linearizer
-from synapse.util.caches.descriptors import _CacheContext, cached
 from synapse.util.metrics import Measure
 from synapse.util.wheel_timer import WheelTimer
 
@@ -1656,15 +1653,18 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
                 # doesn't return. C.f. #5503.
                 return [], max_token
 
-            # Figure out which other users this user should receive updates for
-            users_interested_in = await self._get_interested_in(user, explicit_room_id)
+            # Figure out which other users this user should explicitly receive
+            # updates for
+            additional_users_interested_in = (
+                await self.get_presence_router().get_interested_users(user.to_string())
+            )
 
             # We have a set of users that we're interested in the presence of. We want to
             # cross-reference that with the users that have actually changed their presence.
 
             # Check whether this user should see all user updates
 
-            if users_interested_in == PresenceRouter.ALL_USERS:
+            if additional_users_interested_in == PresenceRouter.ALL_USERS:
                 # Provide presence state for all users
                 presence_updates = await self._filter_all_presence_updates_for_user(
                     user_id, include_offline, from_key
@@ -1673,34 +1673,47 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
                 return presence_updates, max_token
 
             # Make mypy happy. users_interested_in should now be a set
-            assert not isinstance(users_interested_in, str)
+            assert not isinstance(additional_users_interested_in, str)
+
+            # We always care about our own presence.
+            additional_users_interested_in.add(user_id)
+
+            if explicit_room_id:
+                user_ids = await self.store.get_users_in_room(explicit_room_id)
+                additional_users_interested_in.update(user_ids)
 
             # The set of users that we're interested in and that have had a presence update.
             # We'll actually pull the presence updates for these users at the end.
-            interested_and_updated_users: Union[Set[str], FrozenSet[str]] = set()
+            interested_and_updated_users: Collection[str]
 
             if from_key is not None:
                 # First get all users that have had a presence update
                 updated_users = stream_change_cache.get_all_entities_changed(from_key)
 
                 # Cross-reference users we're interested in with those that have had updates.
-                # Use a slightly-optimised method for processing smaller sets of updates.
-                if updated_users is not None and len(updated_users) < 500:
-                    # For small deltas, it's quicker to get all changes and then
-                    # cross-reference with the users we're interested in
+                if updated_users is not None:
+                    # If we have the full list of changes for presence we can
+                    # simply check which ones share a room with the user.
                     get_updates_counter.labels("stream").inc()
-                    for other_user_id in updated_users:
-                        if other_user_id in users_interested_in:
-                            # mypy thinks this variable could be a FrozenSet as it's possibly set
-                            # to one in the `get_entities_changed` call below, and `add()` is not
-                            # method on a FrozenSet. That doesn't affect us here though, as
-                            # `interested_and_updated_users` is clearly a set() above.
-                            interested_and_updated_users.add(other_user_id)  # type: ignore
+
+                    sharing_users = await self.store.do_users_share_a_room(
+                        user_id, updated_users
+                    )
+
+                    interested_and_updated_users = (
+                        sharing_users.union(additional_users_interested_in)
+                    ).intersection(updated_users)
+
                 else:
                     # Too many possible updates. Find all users we can see and check
                     # if any of them have changed.
                     get_updates_counter.labels("full").inc()
 
+                    users_interested_in = (
+                        await self.store.get_users_who_share_room_with_user(user_id)
+                    )
+                    users_interested_in.update(additional_users_interested_in)
+
                     interested_and_updated_users = (
                         stream_change_cache.get_entities_changed(
                             users_interested_in, from_key
@@ -1709,7 +1722,10 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
             else:
                 # No from_key has been specified. Return the presence for all users
                 # this user is interested in
-                interested_and_updated_users = users_interested_in
+                interested_and_updated_users = (
+                    await self.store.get_users_who_share_room_with_user(user_id)
+                )
+                interested_and_updated_users.update(additional_users_interested_in)
 
             # Retrieve the current presence state for each user
             users_to_state = await self.get_presence_handler().current_state_for_users(
@@ -1804,62 +1820,6 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
     def get_current_key(self) -> int:
         return self.store.get_current_presence_token()
 
-    @cached(num_args=2, cache_context=True)
-    async def _get_interested_in(
-        self,
-        user: UserID,
-        explicit_room_id: Optional[str] = None,
-        cache_context: Optional[_CacheContext] = None,
-    ) -> Union[Set[str], str]:
-        """Returns the set of users that the given user should see presence
-        updates for.
-
-        Args:
-            user: The user to retrieve presence updates for.
-            explicit_room_id: The users that are in the room will be returned.
-
-        Returns:
-            A set of user IDs to return presence updates for, or "ALL" to return all
-            known updates.
-        """
-        user_id = user.to_string()
-        users_interested_in = set()
-        users_interested_in.add(user_id)  # So that we receive our own presence
-
-        # cache_context isn't likely to ever be None due to the @cached decorator,
-        # but we can't have a non-optional argument after the optional argument
-        # explicit_room_id either. Assert cache_context is not None so we can use it
-        # without mypy complaining.
-        assert cache_context
-
-        # Check with the presence router whether we should poll additional users for
-        # their presence information
-        additional_users = await self.get_presence_router().get_interested_users(
-            user.to_string()
-        )
-        if additional_users == PresenceRouter.ALL_USERS:
-            # If the module requested that this user see the presence updates of *all*
-            # users, then simply return that instead of calculating what rooms this
-            # user shares
-            return PresenceRouter.ALL_USERS
-
-        # Add the additional users from the router
-        users_interested_in.update(additional_users)
-
-        # Find the users who share a room with this user
-        users_who_share_room = await self.store.get_users_who_share_room_with_user(
-            user_id, on_invalidate=cache_context.invalidate
-        )
-        users_interested_in.update(users_who_share_room)
-
-        if explicit_room_id:
-            user_ids = await self.store.get_users_in_room(
-                explicit_room_id, on_invalidate=cache_context.invalidate
-            )
-            users_interested_in.update(user_ids)
-
-        return users_interested_in
-
 
 def handle_timeouts(
     user_states: List[UserPresenceState],
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index a2f8310388..e30f9c76d4 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -80,6 +80,10 @@ class SQLBaseStore(metaclass=ABCMeta):
             )
             self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
 
+            # There's no easy way of invalidating this cache for just the users
+            # that have changed, so we just clear the entire thing.
+            self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
+
         for user_id in members_changed:
             self._attempt_to_invalidate_cache(
                 "get_user_in_room_with_profile", (room_id, user_id)
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index df6b82660e..e2cccc688c 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -21,6 +21,7 @@ from typing import (
     FrozenSet,
     Iterable,
     List,
+    Mapping,
     Optional,
     Set,
     Tuple,
@@ -55,6 +56,7 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
+from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
@@ -183,7 +185,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                 self._check_safe_current_state_events_membership_updated_txn,
             )
 
-    @cached(max_entries=100000, iterable=True, prune_unread_entries=False)
+    @cached(max_entries=100000, iterable=True)
     async def get_users_in_room(self, room_id: str) -> List[str]:
         return await self.db_pool.runInteraction(
             "get_users_in_room", self.get_users_in_room_txn, room_id
@@ -561,7 +563,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return results_dict.get("membership"), results_dict.get("event_id")
 
-    @cached(max_entries=500000, iterable=True, prune_unread_entries=False)
+    @cached(max_entries=500000, iterable=True)
     async def get_rooms_for_user_with_stream_ordering(
         self, user_id: str
     ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]:
@@ -732,25 +734,76 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
         return frozenset(r.room_id for r in rooms)
 
-    @cached(
-        max_entries=500000,
-        cache_context=True,
-        iterable=True,
-        prune_unread_entries=False,
+    @cached(max_entries=10000)
+    async def does_pair_of_users_share_a_room(
+        self, user_id: str, other_user_id: str
+    ) -> bool:
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="does_pair_of_users_share_a_room", list_name="other_user_ids"
     )
-    async def get_users_who_share_room_with_user(
-        self, user_id: str, cache_context: _CacheContext
+    async def _do_users_share_a_room(
+        self, user_id: str, other_user_ids: Collection[str]
+    ) -> Mapping[str, Optional[bool]]:
+        """Return mapping from user ID to whether they share a room with the
+        given user.
+
+        Note: `None` and `False` are equivalent and mean they don't share a
+        room.
+        """
+
+        def do_users_share_a_room_txn(
+            txn: LoggingTransaction, user_ids: Collection[str]
+        ) -> Dict[str, bool]:
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "state_key", user_ids
+            )
+
+            # This query works by fetching both the list of rooms for the target
+            # user and the set of other users, and then checking if there is any
+            # overlap.
+            sql = f"""
+                SELECT b.state_key
+                FROM (
+                    SELECT room_id FROM current_state_events
+                    WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ?
+                ) AS a
+                INNER JOIN (
+                    SELECT room_id, state_key FROM current_state_events
+                    WHERE type = 'm.room.member' AND membership = 'join' AND {clause}
+                ) AS b using (room_id)
+                LIMIT 1
+            """
+
+            txn.execute(sql, (user_id, *args))
+            return {u: True for u, in txn}
+
+        to_return = {}
+        for batch_user_ids in batch_iter(other_user_ids, 1000):
+            res = await self.db_pool.runInteraction(
+                "do_users_share_a_room", do_users_share_a_room_txn, batch_user_ids
+            )
+            to_return.update(res)
+
+        return to_return
+
+    async def do_users_share_a_room(
+        self, user_id: str, other_user_ids: Collection[str]
     ) -> Set[str]:
+        """Return the set of users who share a room with the first users"""
+
+        user_dict = await self._do_users_share_a_room(user_id, other_user_ids)
+
+        return {u for u, share_room in user_dict.items() if share_room}
+
+    async def get_users_who_share_room_with_user(self, user_id: str) -> Set[str]:
         """Returns the set of users who share a room with `user_id`"""
-        room_ids = await self.get_rooms_for_user(
-            user_id, on_invalidate=cache_context.invalidate
-        )
+        room_ids = await self.get_rooms_for_user(user_id)
 
         user_who_share_room = set()
         for room_id in room_ids:
-            user_ids = await self.get_users_in_room(
-                room_id, on_invalidate=cache_context.invalidate
-            )
+            user_ids = await self.get_users_in_room(room_id)
             user_who_share_room.update(user_ids)
 
         return user_who_share_room

From 908aeac44ab3acc1c42fd6c33ef716ddf51142a7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 25 Jul 2022 08:34:06 -0400
Subject: [PATCH 178/178] Additional fixes for opentracing type hints. (#13362)

---
 changelog.d/13362.misc         | 1 +
 synapse/logging/opentracing.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13362.misc

diff --git a/changelog.d/13362.misc b/changelog.d/13362.misc
new file mode 100644
index 0000000000..c80578ce95
--- /dev/null
+++ b/changelog.d/13362.misc
@@ -0,0 +1 @@
+Add missing type hints to open tracing module.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index ad5cbf46a4..c1aa205eed 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -910,8 +910,8 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
     def _tag_args_inner(*args: P.args, **kwargs: P.kwargs) -> R:
         argspec = inspect.getfullargspec(func)
         for i, arg in enumerate(argspec.args[1:]):
-            set_tag("ARG_" + arg, args[i])  # type: ignore[index]
-        set_tag("args", args[len(argspec.args) :])  # type: ignore[index]
+            set_tag("ARG_" + arg, str(args[i]))  # type: ignore[index]
+        set_tag("args", str(args[len(argspec.args) :]))  # type: ignore[index]
         set_tag("kwargs", str(kwargs))
         return func(*args, **kwargs)