Merge branch 'develop' of github.com:matrix-org/synapse into anoa/module_api_full_presence_fix_wip

2021-05-11 18:28:06 +01:00
parent bdf5ec745a 63fb220e5f
commit a9687fb14c
370 changed files with 1651 additions and 415 deletions
--- a/.buildkite/scripts/create_postgres_db.py
+++ b/.buildkite/scripts/create_postgres_db.py
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from synapse.storage.engines import create_engine
-
-logger = logging.getLogger("create_postgres_db")
-
-if __name__ == "__main__":
-    # Create a PostgresEngine.
-    db_engine = create_engine({"name": "psycopg2", "args": {}})
-
-    # Connect to postgres to create the base database.
-    # We use "postgres" as a database because it's bound to exist and the "synapse" one
-    # doesn't exist yet.
-    db_conn = db_engine.module.connect(
-        user="postgres", host="postgres", password="postgres", dbname="postgres"
-    )
-    db_conn.autocommit = True
-    cur = db_conn.cursor()
-    cur.execute("CREATE DATABASE synapse;")
-    cur.close()
-    db_conn.close()
--- a/.buildkite/scripts/postgres_exec.py
+++ b/.buildkite/scripts/postgres_exec.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+import psycopg2
+
+# a very simple replacment for `psql`, to make up for the lack of the postgres client
+# libraries in the synapse docker image.
+
+# We use "postgres" as a database because it's bound to exist and the "synapse" one
+# doesn't exist yet.
+db_conn = psycopg2.connect(
+    user="postgres", host="postgres", password="postgres", dbname="postgres"
+)
+db_conn.autocommit = True
+cur = db_conn.cursor()
+for c in sys.argv[1:]:
+    cur.execute(c)
--- a/.buildkite/scripts/test_synapse_port_db.sh
+++ b/.buildkite/scripts/test_synapse_port_db.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 #
-# Test script for 'synapse_port_db', which creates a virtualenv, installs Synapse along
-# with additional dependencies needed for the test (such as coverage or the PostgreSQL
-# driver), update the schema of the test SQLite database and run background updates on it,
-# create an empty test database in PostgreSQL, then run the 'synapse_port_db' script to
-# test porting the SQLite database to the PostgreSQL database (with coverage).
+# Test script for 'synapse_port_db'.
+#   - sets up synapse and deps
+#   - runs the port script on a prepopulated test sqlite db
+#   - also runs it against an new sqlite db
+

 set -xe
 cd `dirname $0`/../..
@@ -22,15 +22,32 @@ echo "--- Generate the signing key"
 # Generate the server's signing key.
 python -m synapse.app.homeserver --generate-keys -c .buildkite/sqlite-config.yaml

-echo "--- Prepare the databases"
+echo "--- Prepare test database"

 # Make sure the SQLite3 database is using the latest schema and has no pending background update.
 scripts-dev/update_database --database-config .buildkite/sqlite-config.yaml

 # Create the PostgreSQL database.
-./.buildkite/scripts/create_postgres_db.py
+./.buildkite/scripts/postgres_exec.py "CREATE DATABASE synapse"

-echo "+++ Run synapse_port_db"
-
-# Run the script
+echo "+++ Run synapse_port_db against test database"
+coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --postgres-config .buildkite/postgres-config.yaml
+
+#####
+
+# Now do the same again, on an empty database.
+
+echo "--- Prepare empty SQLite database"
+
+# we do this by deleting the sqlite db, and then doing the same again.
+rm .buildkite/test_db.db
+
+scripts-dev/update_database --database-config .buildkite/sqlite-config.yaml
+
+# re-create the PostgreSQL database.
+./.buildkite/scripts/postgres_exec.py \
+  "DROP DATABASE synapse" \
+  "CREATE DATABASE synapse"
+
+echo "+++ Run synapse_port_db against empty database"
 coverage run scripts/synapse_port_db --sqlite-database .buildkite/test_db.db --postgres-config .buildkite/postgres-config.yaml
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -273,7 +273,7 @@ jobs:
          python-version: ${{ matrix.python-version }}
      - name: Patch Buildkite-specific test scripts
        run: |
-          sed -i -e 's/host="postgres"/host="localhost"/' .buildkite/scripts/create_postgres_db.py
+          sed -i -e 's/host="postgres"/host="localhost"/' .buildkite/scripts/postgres_exec.py
          sed -i -e 's/host: postgres/host: localhost/' .buildkite/postgres-config.yaml
          sed -i -e 's|/src/||' .buildkite/{sqlite,postgres}-config.yaml
          sed -i -e 's/\$TOP/\$GITHUB_WORKSPACE/' .coveragerc
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,37 @@
+Synapse 1.33.2 (2021-05-11)
+===========================
+
+Due to the security issue highlighted below, server administrators are encouraged to update Synapse. We are not aware of these vulnerabilities being exploited in the wild.
+
+Security advisory
+-----------------
+
+This release fixes a denial of service attack ([CVE-2021-29471](https://github.com/matrix-org/synapse/security/advisories/GHSA-x345-32rc-8h85)) against Synapse's push rules implementation. Server admins are encouraged to upgrade.
+
+Internal Changes
+----------------
+
+- Unpin attrs dependency. ([\#9946](https://github.com/matrix-org/synapse/issues/9946))
+
+
+Synapse 1.33.1 (2021-05-06)
+===========================
+
+Bugfixes
+--------
+
+- Fix bug where `/sync` would break if using the latest version of `attrs` dependency, by pinning to a previous version. ([\#9937](https://github.com/matrix-org/synapse/issues/9937))
+
+
+Synapse 1.33.0 (2021-05-05)
+===========================
+
+Features
+--------
+
+- Build Debian packages for Ubuntu 21.04 (Hirsute Hippo). ([\#9909](https://github.com/matrix-org/synapse/issues/9909))
+
+
 Synapse 1.33.0rc2 (2021-04-29)
 ==============================

--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -85,6 +85,35 @@ for example:
     wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb

+Upgrading to v1.34.0
+====================
+
+`room_invite_state_types` configuration setting
+-----------------------------------------------
+
+The ``room_invite_state_types`` configuration setting has been deprecated and
+replaced with ``room_prejoin_state``. See the `sample configuration file <https://github.com/matrix-org/synapse/blob/v1.34.0/docs/sample_config.yaml#L1515>`_.
+
+If you have set ``room_invite_state_types`` to the default value you should simply
+remove it from your configuration file. The default value used to be:
+
+.. code:: yaml
+
+   room_invite_state_types:
+      - "m.room.join_rules"
+      - "m.room.canonical_alias"
+      - "m.room.avatar"
+      - "m.room.encryption"
+      - "m.room.name"
+
+If you have customised this value by adding addition state types, you should
+remove ``room_invite_state_types`` and configure ``additional_event_types`` with
+your customisations.
+
+If you have customised this value by removing state types, you should rename
+``room_invite_state_types`` to ``additional_event_types``, and set
+``disable_default_event_types`` to ``true``.
+
 Upgrading to v1.33.0
 ====================

--- a/changelog.d/9881.feature
+++ b/changelog.d/9881.feature
@@ -0,0 +1 @@
+Add experimental option to track memory usage of the caches.
--- a/changelog.d/9882.misc
+++ b/changelog.d/9882.misc
@@ -0,0 +1 @@
+Export jemalloc stats to Prometheus if it is being used.
--- a/changelog.d/9902.feature
+++ b/changelog.d/9902.feature
@@ -0,0 +1 @@
+Add limits to how often Synapse will GC, ensuring that large servers do not end up GC thrashing if `gc_thresholds` has not been correctly set.
--- a/changelog.d/9904.misc
+++ b/changelog.d/9904.misc
@@ -0,0 +1 @@
+Time response time for external cache requests.
--- a/changelog.d/9905.feature
+++ b/changelog.d/9905.feature
@@ -0,0 +1 @@
+Improve performance of sending events for worker-based deployments using Redis.
--- a/changelog.d/9910.bugfix
+++ b/changelog.d/9910.bugfix
@@ -0,0 +1 @@
+Fix bug where user directory could get out of sync if room visibility and membership changed in quick succession.
--- a/changelog.d/9910.feature
+++ b/changelog.d/9910.feature
@@ -0,0 +1 @@
+Improve performance after joining a large room when presence is enabled.
--- a/changelog.d/9911.doc
+++ b/changelog.d/9911.doc
@@ -0,0 +1 @@
+Add `port` argument to the Postgres database sample config section.
--- a/changelog.d/9913.docker
+++ b/changelog.d/9913.docker
@@ -0,0 +1 @@
+Added startup_delay to docker healthcheck to reduce waiting time for coming online, updated readme for extra options, contributed by @Maquis196.
--- a/changelog.d/9915.feature
+++ b/changelog.d/9915.feature
@@ -0,0 +1 @@
+Support stable identifiers for [MSC1772](https://github.com/matrix-org/matrix-doc/pull/1772) Spaces. `m.space.child` events will now be taken into account when populating the experimental spaces summary response. Please see `UPGRADE.rst` if you have customised `room_invite_state_types` in your configuration.
--- a/changelog.d/9916.feature
+++ b/changelog.d/9916.feature
@@ -0,0 +1 @@
+Improve performance after joining a large room when presence is enabled.
--- a/changelog.d/9928.bugfix
+++ b/changelog.d/9928.bugfix
@@ -0,0 +1 @@
+Include the `origin_server_ts` property in the experimental [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946) support to allow clients to properly sort rooms.
--- a/changelog.d/9930.bugfix
+++ b/changelog.d/9930.bugfix
@@ -0,0 +1 @@
+Fix bugs introduced in v1.23.0 which made the PostgreSQL port script fail when run with a newly-created SQLite database.
--- a/changelog.d/9931.misc
+++ b/changelog.d/9931.misc
@@ -0,0 +1 @@
+Minor fixes to the `make_full_schema.sh` script.
--- a/changelog.d/9932.misc
+++ b/changelog.d/9932.misc
@@ -0,0 +1 @@
+Move database schema files into a common directory.
--- a/changelog.d/9935.feature
+++ b/changelog.d/9935.feature
@@ -0,0 +1 @@
+Improve performance of backfilling in large rooms.
--- a/changelog.d/9945.feature
+++ b/changelog.d/9945.feature
@@ -0,0 +1 @@
+Add a config option to allow you to prevent device display names from being shared over federation. Contributed by @aaronraimist.
--- a/changelog.d/9947.feature
+++ b/changelog.d/9947.feature
@@ -0,0 +1 @@
+Update support for [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946): Spaces Summary.
--- a/changelog.d/9950.feature
+++ b/changelog.d/9950.feature
@@ -0,0 +1 @@
+Improve performance of sending events for worker-based deployments using Redis.
--- a/changelog.d/9954.feature
+++ b/changelog.d/9954.feature
@@ -0,0 +1 @@
+Update support for [MSC2946](https://github.com/matrix-org/matrix-doc/pull/2946): Spaces Summary.
--- a/changelog.d/9959.misc
+++ b/changelog.d/9959.misc
@@ -0,0 +1 @@
+Add debug logging for lost/delayed to-device messages.
--- a/changelog.d/9961.bugfix
+++ b/changelog.d/9961.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.29.0 which caused `m.room_key_request` to-device messages sent from one user to another to be dropped.
--- a/changelog.d/9965.bugfix
+++ b/changelog.d/9965.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.29.0 which caused `m.room_key_request` to-device messages sent from one user to another to be dropped.
--- a/changelog.d/9966.feature
+++ b/changelog.d/9966.feature
@@ -0,0 +1 @@
+Support stable identifiers for [MSC1772](https://github.com/matrix-org/matrix-doc/pull/1772) Spaces. `m.space.child` events will now be taken into account when populating the experimental spaces summary response. Please see `UPGRADE.rst` if you have customised `room_invite_state_types` in your configuration.
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,21 @@
+matrix-synapse-py3 (1.33.2) stable; urgency=medium
+
+  * New synapse release 1.33.2.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 11 May 2021 11:17:59 +0100
+
+matrix-synapse-py3 (1.33.1) stable; urgency=medium
+
+  * New synapse release 1.33.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Thu, 06 May 2021 14:06:33 +0100
+
+matrix-synapse-py3 (1.33.0) stable; urgency=medium
+
+  * New synapse release 1.33.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Wed, 05 May 2021 14:15:27 +0100
+
 matrix-synapse-py3 (1.32.2) stable; urgency=medium

  * New synapse release 1.32.2.
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -88,5 +88,5 @@ EXPOSE 8008/tcp 8009/tcp 8448/tcp

 ENTRYPOINT ["/start.py"]

-HEALTHCHECK --interval=1m --timeout=5s \
+HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \
    CMD curl -fSs http://localhost:8008/health || exit 1
--- a/docker/README.md
+++ b/docker/README.md
@@ -191,6 +191,16 @@ whilst running the above `docker run` commands.
 ```
   --no-healthcheck
 ```
+
+## Disabling the healthcheck in docker-compose file
+
+If you wish to disable the healthcheck via docker-compose, append the following to your service configuration.
+
+```
+  healthcheck:
+    disable: true
+```
+
 ## Setting custom healthcheck on docker run

 If you wish to point the healthcheck at a different port with docker command, add the following
@@ -202,14 +212,15 @@ If you wish to point the healthcheck at a different port with docker command, ad
 ## Setting the healthcheck in docker-compose file

 You can add the following to set a custom healthcheck in a docker compose file.
-You will need version >2.1 for this to work. 
+You will need docker-compose version >2.1 for this to work. 

 ```
 healthcheck:
  test: ["CMD", "curl", "-fSs", "http://localhost:8008/health"]
-  interval: 1m
-  timeout: 10s
+  interval: 15s
+  timeout: 5s
  retries: 3
+  start_period: 5s
 ```

 ## Using jemalloc
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -152,6 +152,16 @@ presence:
 #
 #gc_thresholds: [700, 10, 10]

+# The minimum time in seconds between each GC for a generation, regardless of
+# the GC thresholds. This ensures that we don't do GC too frequently.
+#
+# A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
+# generation 0 GCs, etc.
+#
+# Defaults to `[1s, 10s, 30s]`.
+#
+#gc_min_interval: [0.5s, 30s, 1m]
+
 # Set the limit on the returned events in the timeline in the get
 # and sync operations. The default value is 100. -1 means no upper limit.
 #
@@ -731,6 +741,12 @@ acme:
 #
 #allow_profile_lookup_over_federation: false

+# Uncomment to disable device display name lookup over federation. By default, the
+# Federation API allows other homeservers to obtain device display names of any user
+# on this homeserver. Defaults to 'true'.
+#
+#allow_device_name_lookup_over_federation: false
+

 ## Caching ##

@@ -810,6 +826,7 @@ caches:
 #    password: secretpassword
 #    database: synapse
 #    host: localhost
+#    port: 5432
 #    cp_min: 5
 #    cp_max: 10
 #
@@ -1504,6 +1521,7 @@ room_prejoin_state:
   # - m.room.avatar
   # - m.room.encryption
   # - m.room.name
+   # - m.room.create
   #
   # Uncomment the following to disable these defaults (so that only the event
   # types listed in 'additional_event_types' are shared). Defaults to 'false'.
--- a/mypy.ini
+++ b/mypy.ini
@@ -171,3 +171,6 @@ ignore_missing_imports = True

 [mypy-txacme.*]
 ignore_missing_imports = True
+
+[mypy-pympler.*]
+ignore_missing_imports = True
--- a/scripts-dev/build_debian_packages
+++ b/scripts-dev/build_debian_packages
@@ -21,9 +21,10 @@ DISTS = (
    "debian:buster",
    "debian:bullseye",
    "debian:sid",
-    "ubuntu:bionic",
-    "ubuntu:focal",
-    "ubuntu:groovy",
+    "ubuntu:bionic",   # 18.04 LTS (our EOL forced by Py36 on 2021-12-23)
+    "ubuntu:focal",    # 20.04 LTS (our EOL forced by Py38 on 2024-10-14)
+    "ubuntu:groovy",   # 20.10 (EOL 2021-07-07)
+    "ubuntu:hirsute",  # 21.04 (EOL 2022-01-05)
 )

 DESC = '''\
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -6,7 +6,7 @@
 # It does so by having Synapse generate an up-to-date SQLite DB, then running
 # synapse_port_db to convert it to Postgres. It then dumps the contents of both.

-POSTGRES_HOST="localhost"
+export PGHOST="localhost"
 POSTGRES_DB_NAME="synapse_full_schema.$$"

 SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite"
@@ -32,7 +32,7 @@ usage() {
 while getopts "p:co:h" opt; do
  case $opt in
    p)
-      POSTGRES_USERNAME=$OPTARG
+      export PGUSER=$OPTARG
      ;;
    c)
      # Print all commands that are being executed
@@ -69,7 +69,7 @@ if [ ${#unsatisfied_requirements} -ne 0 ]; then
  exit 1
 fi

-if [ -z "$POSTGRES_USERNAME" ]; then
+if [ -z "$PGUSER" ]; then
  echo "No postgres username supplied"
  usage
  exit 1
@@ -84,8 +84,9 @@ fi
 # Create the output directory if it doesn't exist
 mkdir -p "$OUTPUT_DIR"

-read -rsp "Postgres password for '$POSTGRES_USERNAME': " POSTGRES_PASSWORD
+read -rsp "Postgres password for '$PGUSER': " PGPASSWORD
 echo ""
+export PGPASSWORD

 # Exit immediately if a command fails
 set -e
@@ -131,9 +132,9 @@ report_stats: false
 database:
  name: "psycopg2"
  args:
-    user: "$POSTGRES_USERNAME"
-    host: "$POSTGRES_HOST"
-    password: "$POSTGRES_PASSWORD"
+    user: "$PGUSER"
+    host: "$PGHOST"
+    password: "$PGPASSWORD"
    database: "$POSTGRES_DB_NAME"

 # Suppress the key server warning.
@@ -150,7 +151,7 @@ scripts-dev/update_database --database-config "$SQLITE_CONFIG"

 # Create the PostgreSQL database.
 echo "Creating postgres database..."
-createdb $POSTGRES_DB_NAME
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"

 echo "Copying data from SQLite3 to Postgres with synapse_port_db..."
 if [ -z "$COVERAGE" ]; then
@@ -181,7 +182,7 @@ DROP TABLE user_directory_search_docsize;
 DROP TABLE user_directory_search_stat;
 "
 sqlite3 "$SQLITE_DB" <<< "$SQL"
-psql $POSTGRES_DB_NAME -U "$POSTGRES_USERNAME" -w <<< "$SQL"
+psql "$POSTGRES_DB_NAME" -w <<< "$SQL"

 echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..."
 sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE"
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -913,10 +913,11 @@ class Porter(object):
                    (curr_forward_id + 1,),
                )

-            txn.execute(
-                "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s",
-                (curr_backward_id + 1,),
-            )
+            if curr_backward_id:
+                txn.execute(
+                    "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s",
+                    (curr_backward_id + 1,),
+                )

        await self.postgres_store.db_pool.runInteraction(
            "_setup_events_stream_seqs", _setup_events_stream_seqs_set_pos,
@@ -954,10 +955,11 @@ class Porter(object):
                (curr_chain_id,),
            )

-        await self.postgres_store.db_pool.runInteraction(
-            "_setup_event_auth_chain_id", r,
-        )
-
+        if curr_chain_id is not None:
+            await self.postgres_store.db_pool.runInteraction(
+                "_setup_event_auth_chain_id",
+                r,
+            )


 ##############################################
--- a/synapse/init.py
+++ b/synapse/init.py
@@ -47,7 +47,7 @@ try:
 except ImportError:
    pass

-__version__ = "1.33.0rc2"
+__version__ = "1.33.2"

 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
    # We import here so that we don't have to install a bunch of deps when
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -110,13 +110,18 @@ class EventTypes:

    Dummy = "org.matrix.dummy_event"

+    SpaceChild = "m.space.child"
+    SpaceParent = "m.space.parent"
    MSC1772_SPACE_CHILD = "org.matrix.msc1772.space.child"
    MSC1772_SPACE_PARENT = "org.matrix.msc1772.space.parent"


+class ToDeviceEventTypes:
+    RoomKeyRequest = "m.room_key_request"
+
+
 class EduTypes:
    Presence = "m.presence"
-    RoomKeyRequest = "m.room_key_request"


 class RejectedReason:
@@ -174,6 +179,7 @@ class EventContentFields:
    SELF_DESTRUCT_AFTER = "org.matrix.self_destruct_after"

    # cf https://github.com/matrix-org/matrix-doc/pull/1772
+    ROOM_TYPE = "type"
    MSC1772_ROOM_TYPE = "org.matrix.msc1772.type"


--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -37,6 +37,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.crypto import context_factory
 from synapse.logging.context import PreserveLoggingContext
 from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.metrics.jemalloc import setup_jemalloc_stats
 from synapse.util.async_helpers import Linearizer
 from synapse.util.daemonize import daemonize_process
 from synapse.util.rlimit import change_resource_limit
@@ -115,6 +116,7 @@ def start_reactor(

    def run():
        logger.info("Running")
+        setup_jemalloc_stats()
        change_resource_limit(soft_file_limit)
        if gc_thresholds:
            gc.set_threshold(*gc_thresholds)
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -454,6 +454,10 @@ def start(config_options):
        config.server.update_user_directory = False

    synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
+    synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
+
+    if config.server.gc_seconds:
+        synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds

    hs = GenericWorkerServer(
        config.server_name,
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -341,6 +341,10 @@ def setup(config_options):
        sys.exit(0)

    events.USE_FROZEN_DICTS = config.use_frozen_dicts
+    synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
+
+    if config.server.gc_seconds:
+        synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds

    hs = SynapseHomeServer(
        config.server_name,
--- a/synapse/config/api.py
+++ b/synapse/config/api.py
@@ -88,10 +88,6 @@ class ApiConfig(Config):
        if not room_prejoin_state_config.get("disable_default_event_types"):
            yield from _DEFAULT_PREJOIN_STATE_TYPES

-            if self.spaces_enabled:
-                # MSC1772 suggests adding m.room.create to the prejoin state
-                yield EventTypes.Create
-
        yield from room_prejoin_state_config.get("additional_event_types", [])


@@ -109,6 +105,8 @@ _DEFAULT_PREJOIN_STATE_TYPES = [
    EventTypes.RoomAvatar,
    EventTypes.RoomEncryption,
    EventTypes.Name,
+    # Per MSC1772.
+    EventTypes.Create,
 ]


--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -17,6 +17,8 @@ import re
 import threading
 from typing import Callable, Dict

+from synapse.python_dependencies import DependencyException, check_requirements
+
 from ._base import Config, ConfigError

 # The prefix for all cache factor-related environment variables
@@ -189,6 +191,15 @@ class CacheConfig(Config):
                )
            self.cache_factors[cache] = factor

+        self.track_memory_usage = cache_config.get("track_memory_usage", False)
+        if self.track_memory_usage:
+            try:
+                check_requirements("cache_memory")
+            except DependencyException as e:
+                raise ConfigError(
+                    e.message  # noqa: B306, DependencyException.message is a property
+                )
+
        # Resize all caches (if necessary) with the new factors we've loaded
        self.resize_all_caches()

--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -58,6 +58,7 @@ DEFAULT_CONFIG = """\
 #    password: secretpassword
 #    database: synapse
 #    host: localhost
+#    port: 5432
 #    cp_min: 5
 #    cp_max: 10
 #
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -44,6 +44,10 @@ class FederationConfig(Config):
            "allow_profile_lookup_over_federation", True
        )

+        self.allow_device_name_lookup_over_federation = config.get(
+            "allow_device_name_lookup_over_federation", True
+        )
+
    def generate_config_section(self, config_dir_path, server_name, **kwargs):
        return """\
        ## Federation ##
@@ -75,6 +79,12 @@ class FederationConfig(Config):
        # on this homeserver. Defaults to 'true'.
        #
        #allow_profile_lookup_over_federation: false
+
+        # Uncomment to disable device display name lookup over federation. By default, the
+        # Federation API allows other homeservers to obtain device display names of any user
+        # on this homeserver. Defaults to 'true'.
+        #
+        #allow_device_name_lookup_over_federation: false
        """


--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -19,7 +19,7 @@ import logging
 import os.path
 import re
 from textwrap import indent
-from typing import Any, Dict, Iterable, List, Optional, Set
+from typing import Any, Dict, Iterable, List, Optional, Set, Tuple

 import attr
 import yaml
@@ -572,6 +572,7 @@ class ServerConfig(Config):
            _warn_if_webclient_configured(self.listeners)

        self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None))
+        self.gc_seconds = self.read_gc_intervals(config.get("gc_min_interval", None))

        @attr.s
        class LimitRemoteRoomsConfig:
@@ -917,6 +918,16 @@ class ServerConfig(Config):
        #
        #gc_thresholds: [700, 10, 10]

+        # The minimum time in seconds between each GC for a generation, regardless of
+        # the GC thresholds. This ensures that we don't do GC too frequently.
+        #
+        # A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
+        # generation 0 GCs, etc.
+        #
+        # Defaults to `[1s, 10s, 30s]`.
+        #
+        #gc_min_interval: [0.5s, 30s, 1m]
+
        # Set the limit on the returned events in the timeline in the get
        # and sync operations. The default value is 100. -1 means no upper limit.
        #
@@ -1305,6 +1316,24 @@ class ServerConfig(Config):
            help="Turn on the twisted telnet manhole service on the given port.",
        )

+    def read_gc_intervals(self, durations) -> Optional[Tuple[float, float, float]]:
+        """Reads the three durations for the GC min interval option, returning seconds."""
+        if durations is None:
+            return None
+
+        try:
+            if len(durations) != 3:
+                raise ValueError()
+            return (
+                self.parse_duration(durations[0]) / 1000,
+                self.parse_duration(durations[1]) / 1000,
+                self.parse_duration(durations[2]) / 1000,
+            )
+        except Exception:
+            raise ConfigError(
+                "Value of `gc_min_interval` must be a list of three durations if set"
+            )
+

 def is_threepid_reserved(reserved_threepids, threepid):
    """Check the threepid against the reserved threepid config
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -17,7 +17,7 @@ import os
 import warnings
 from datetime import datetime
 from hashlib import sha256
-from typing import List, Optional
+from typing import List, Optional, Pattern

 from unpaddedbase64 import encode_base64

@@ -124,7 +124,7 @@ class TlsConfig(Config):
            fed_whitelist_entries = []

        # Support globs (*) in whitelist values
-        self.federation_certificate_verification_whitelist = []  # type: List[str]
+        self.federation_certificate_verification_whitelist = []  # type: List[Pattern]
        for entry in fed_whitelist_entries:
            try:
                entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii"))
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -44,7 +44,6 @@ from synapse.api.errors import (
    SynapseError,
    UnsupportedRoomVersionError,
 )
-from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
@@ -865,14 +864,6 @@ class FederationHandlerRegistry:
        # EDU received.
        self._edu_type_to_instance = {}  # type: Dict[str, List[str]]

-        # A rate limiter for incoming room key requests per origin.
-        self._room_key_request_rate_limiter = Ratelimiter(
-            store=hs.get_datastore(),
-            clock=self.clock,
-            rate_hz=self.config.rc_key_requests.per_second,
-            burst_count=self.config.rc_key_requests.burst_count,
-        )
-
    def register_edu_handler(
        self, edu_type: str, handler: Callable[[str, JsonDict], Awaitable[None]]
    ) -> None:
@@ -926,16 +917,6 @@ class FederationHandlerRegistry:
        if not self.config.use_presence and edu_type == EduTypes.Presence:
            return

-        # If the incoming room key requests from a particular origin are over
-        # the limit, drop them.
-        if (
-            edu_type == EduTypes.RoomKeyRequest
-            and not await self._room_key_request_rate_limiter.can_do_action(
-                None, origin
-            )
-        ):
-            return
-
        # Check if we have a handler on this instance
        handler = self.edu_handlers.get(edu_type)
        if handler:
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -28,6 +28,7 @@ from synapse.api.presence import UserPresenceState
 from synapse.events import EventBase
 from synapse.federation.units import Edu
 from synapse.handlers.presence import format_user_presence_state
+from synapse.logging import issue9533_logger
 from synapse.logging.opentracing import SynapseTags, set_tag
 from synapse.metrics import sent_transactions_counter
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -574,6 +575,14 @@ class PerDestinationQueue:
            for content in contents
        ]

+        if edus:
+            issue9533_logger.debug(
+                "Sending %i to-device messages to %s, up to stream id %i",
+                len(edus),
+                self._destination,
+                stream_id,
+            )
+
        return (edus, stream_id)

    def _start_catching_up(self) -> None:
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -995,6 +995,7 @@ class TransportLayerClient:
               returned per space
            exclude_rooms: a list of any rooms we can skip
        """
+        # TODO When switching to the stable endpoint, use GET instead of POST.
        path = _create_path(
            FEDERATION_UNSTABLE_PREFIX, "/org.matrix.msc2946/spaces/%s", room_id
        )
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -1376,6 +1376,32 @@ class FederationSpaceSummaryServlet(BaseFederationServlet):
    PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946"
    PATH = "/spaces/(?P<room_id>[^/]*)"

+    async def on_GET(
+        self,
+        origin: str,
+        content: JsonDict,
+        query: Mapping[bytes, Sequence[bytes]],
+        room_id: str,
+    ) -> Tuple[int, JsonDict]:
+        suggested_only = parse_boolean_from_args(query, "suggested_only", default=False)
+        max_rooms_per_space = parse_integer_from_args(query, "max_rooms_per_space")
+
+        exclude_rooms = []
+        if b"exclude_rooms" in query:
+            try:
+                exclude_rooms = [
+                    room_id.decode("ascii") for room_id in query[b"exclude_rooms"]
+                ]
+            except Exception:
+                raise SynapseError(
+                    400, "Bad query parameter for exclude_rooms", Codes.INVALID_PARAM
+                )
+
+        return 200, await self.handler.federation_space_summary(
+            room_id, suggested_only, max_rooms_per_space, exclude_rooms
+        )
+
+    # TODO When switching to the stable endpoint, remove the POST handler.
    async def on_POST(
        self,
        origin: str,
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -15,7 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Any, Dict

-from synapse.api.constants import EduTypes
+from synapse.api.constants import ToDeviceEventTypes
 from synapse.api.errors import SynapseError
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.logging.context import run_in_background
@@ -79,6 +79,8 @@ class DeviceMessageHandler:
                ReplicationUserDevicesResyncRestServlet.make_client(hs)
            )

+        # a rate limiter for room key requests.  The keys are
+        # (sending_user_id, sending_device_id).
        self._ratelimiter = Ratelimiter(
            store=self.store,
            clock=hs.get_clock(),
@@ -100,12 +102,25 @@ class DeviceMessageHandler:
        for user_id, by_device in content["messages"].items():
            # we use UserID.from_string to catch invalid user ids
            if not self.is_mine(UserID.from_string(user_id)):
-                logger.warning("Request for keys for non-local user %s", user_id)
+                logger.warning("To-device message to non-local user %s", user_id)
                raise SynapseError(400, "Not a user here")

            if not by_device:
                continue

+            # Ratelimit key requests by the sending user.
+            if message_type == ToDeviceEventTypes.RoomKeyRequest:
+                allowed, _ = await self._ratelimiter.can_do_action(
+                    None, (sender_user_id, None)
+                )
+                if not allowed:
+                    logger.info(
+                        "Dropping room_key_request from %s to %s due to rate limit",
+                        sender_user_id,
+                        user_id,
+                    )
+                    continue
+
            messages_by_device = {
                device_id: {
                    "content": message_content,
@@ -192,13 +207,19 @@ class DeviceMessageHandler:
        for user_id, by_device in messages.items():
            # Ratelimit local cross-user key requests by the sending device.
            if (
-                message_type == EduTypes.RoomKeyRequest
+                message_type == ToDeviceEventTypes.RoomKeyRequest
                and user_id != sender_user_id
-                and await self._ratelimiter.can_do_action(
+            ):
+                allowed, _ = await self._ratelimiter.can_do_action(
                    requester, (sender_user_id, requester.device_id)
                )
-            ):
-                continue
+                if not allowed:
+                    logger.info(
+                        "Dropping room_key_request from %s to %s due to rate limit",
+                        sender_user_id,
+                        user_id,
+                    )
+                    continue

            # we use UserID.from_string to catch invalid user ids
            if self.is_mine(UserID.from_string(user_id)):
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -78,7 +78,7 @@ class DirectoryHandler(BaseHandler):
        # TODO(erikj): Add transactions.
        # TODO(erikj): Check if there is a current association.
        if not servers:
-            users = await self.state.get_current_users_in_room(room_id)
+            users = await self.store.get_users_in_room(room_id)
            servers = {get_domain_from_id(u) for u in users}

        if not servers:
@@ -270,7 +270,7 @@ class DirectoryHandler(BaseHandler):
                Codes.NOT_FOUND,
            )

-        users = await self.state.get_current_users_in_room(room_id)
+        users = await self.store.get_users_in_room(room_id)
        extra_servers = {get_domain_from_id(u) for u in users}
        servers = set(extra_servers) | set(servers)

--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -103,7 +103,7 @@ class EventStreamHandler(BaseHandler):
                    # Send down presence.
                    if event.state_key == auth_user_id:
                        # Send down presence for everyone in the room.
-                        users = await self.state.get_current_users_in_room(
+                        users = await self.store.get_users_in_room(
                            event.room_id
                        )  # type: Iterable[str]
                    else:
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -552,8 +552,12 @@ class FederationHandler(BaseHandler):
        destination: str,
        room_id: str,
        event_id: str,
-    ) -> Tuple[List[EventBase], List[EventBase]]:
-        """Requests all of the room state at a given event from a remote homeserver.
+    ) -> List[EventBase]:
+        """Requests all of the room state at a given event from a remote
+        homeserver.
+
+        Will also fetch any missing events reported in the `auth_chain_ids`
+        section of `/state_ids`.

        Args:
            destination: The remote homeserver to query for the state.
@@ -561,8 +565,7 @@ class FederationHandler(BaseHandler):
            event_id: The id of the event we want the state at.

        Returns:
-            A list of events in the state, not including the event itself, and
-            a list of events in the auth chain for the given event.
+            A list of events in the state, not including the event itself.
        """
        (
            state_event_ids,
@@ -571,68 +574,53 @@ class FederationHandler(BaseHandler):
            destination, room_id, event_id=event_id
        )

-        desired_events = set(state_event_ids + auth_event_ids)
-
-        event_map = await self._get_events_from_store_or_dest(
-            destination, room_id, desired_events
-        )
-
-        failed_to_fetch = desired_events - event_map.keys()
-        if failed_to_fetch:
-            logger.warning(
-                "Failed to fetch missing state/auth events for %s %s",
-                event_id,
-                failed_to_fetch,
-            )
-
-        remote_state = [
-            event_map[e_id] for e_id in state_event_ids if e_id in event_map
-        ]
-
-        auth_chain = [event_map[e_id] for e_id in auth_event_ids if e_id in event_map]
-        auth_chain.sort(key=lambda e: e.depth)
-
-        return remote_state, auth_chain
-
-    async def _get_events_from_store_or_dest(
-        self, destination: str, room_id: str, event_ids: Iterable[str]
-    ) -> Dict[str, EventBase]:
-        """Fetch events from a remote destination, checking if we already have them.
-
-        Persists any events we don't already have as outliers.
-
-        If we fail to fetch any of the events, a warning will be logged, and the event
-        will be omitted from the result. Likewise, any events which turn out not to
-        be in the given room.
-
-        This function *does not* automatically get missing auth events of the
-        newly fetched events. Callers must include the full auth chain of
-        of the missing events in the `event_ids` argument, to ensure that any
-        missing auth events are correctly fetched.
-
-        Returns:
-            map from event_id to event
-        """
-        fetched_events = await self.store.get_events(event_ids, allow_rejected=True)
-
-        missing_events = set(event_ids) - fetched_events.keys()
-
-        if missing_events:
-            logger.debug(
-                "Fetching unknown state/auth events %s for room %s",
-                missing_events,
-                room_id,
-            )
+        # Fetch the state events from the DB, and check we have the auth events.
+        event_map = await self.store.get_events(state_event_ids, allow_rejected=True)
+        auth_events_in_store = await self.store.have_seen_events(auth_event_ids)

+        # Check for missing events. We handle state and auth event seperately,
+        # as we want to pull the state from the DB, but we don't for the auth
+        # events. (Note: we likely won't use the majority of the auth chain, and
+        # it can be *huge* for large rooms, so it's worth ensuring that we don't
+        # unnecessarily pull it from the DB).
+        missing_state_events = set(state_event_ids) - set(event_map)
+        missing_auth_events = set(auth_event_ids) - set(auth_events_in_store)
+        if missing_state_events or missing_auth_events:
            await self._get_events_and_persist(
-                destination=destination, room_id=room_id, events=missing_events
+                destination=destination,
+                room_id=room_id,
+                events=missing_state_events | missing_auth_events,
            )

-            # we need to make sure we re-load from the database to get the rejected
-            # state correct.
-            fetched_events.update(
-                (await self.store.get_events(missing_events, allow_rejected=True))
-            )
+            if missing_state_events:
+                new_events = await self.store.get_events(
+                    missing_state_events, allow_rejected=True
+                )
+                event_map.update(new_events)
+
+                missing_state_events.difference_update(new_events)
+
+                if missing_state_events:
+                    logger.warning(
+                        "Failed to fetch missing state events for %s %s",
+                        event_id,
+                        missing_state_events,
+                    )
+
+            if missing_auth_events:
+                auth_events_in_store = await self.store.have_seen_events(
+                    missing_auth_events
+                )
+                missing_auth_events.difference_update(auth_events_in_store)
+
+                if missing_auth_events:
+                    logger.warning(
+                        "Failed to fetch missing auth events for %s %s",
+                        event_id,
+                        missing_auth_events,
+                    )
+
+        remote_state = list(event_map.values())

        # check for events which were in the wrong room.
        #
@@ -640,8 +628,8 @@ class FederationHandler(BaseHandler):
        # auth_events at an event in room A are actually events in room B

        bad_events = [
-            (event_id, event.room_id)
-            for event_id, event in fetched_events.items()
+            (event.event_id, event.room_id)
+            for event in remote_state
            if event.room_id != room_id
        ]

@@ -658,9 +646,10 @@ class FederationHandler(BaseHandler):
                room_id,
            )

-            del fetched_events[bad_event_id]
+        if bad_events:
+            remote_state = [e for e in remote_state if e.room_id == room_id]

-        return fetched_events
+        return remote_state

    async def _get_state_after_missing_prev_event(
        self,
@@ -963,27 +952,23 @@ class FederationHandler(BaseHandler):

        # For each edge get the current state.

-        auth_events = {}
        state_events = {}
        events_to_state = {}
        for e_id in edges:
-            state, auth = await self._get_state_for_room(
+            state = await self._get_state_for_room(
                destination=dest,
                room_id=room_id,
                event_id=e_id,
            )
-            auth_events.update({a.event_id: a for a in auth})
-            auth_events.update({s.event_id: s for s in state})
            state_events.update({s.event_id: s for s in state})
            events_to_state[e_id] = state

        required_auth = {
            a_id
-            for event in events
-            + list(state_events.values())
-            + list(auth_events.values())
+            for event in events + list(state_events.values())
            for a_id in event.auth_event_ids()
        }
+        auth_events = await self.store.get_events(required_auth, allow_rejected=True)
        auth_events.update(
            {e_id: event_map[e_id] for e_id in required_auth if e_id in event_map}
        )
@@ -2446,7 +2431,9 @@ class FederationHandler(BaseHandler):
        # If we are going to send this event over federation we precaclculate
        # the joined hosts.
        if event.internal_metadata.get_send_on_behalf_of():
-            await self.event_creation_handler.cache_joined_hosts_for_event(event)
+            await self.event_creation_handler.cache_joined_hosts_for_event(
+                event, context
+            )

        return context

--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -51,6 +51,7 @@ from synapse.storage.state import StateFilter
 from synapse.types import Requester, RoomAlias, StreamToken, UserID, create_requester
 from synapse.util import json_decoder, json_encoder
 from synapse.util.async_helpers import Linearizer
+from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_events_for_client

@@ -258,7 +259,7 @@ class MessageHandler:
                    "Getting joined members after leaving is not implemented"
                )

-        users_with_profile = await self.state.get_current_users_in_room(room_id)
+        users_with_profile = await self.store.get_users_in_room_with_profiles(room_id)

        # If this is an AS, double check that they are allowed to see the members.
        # This can either be because the AS user is in the room or because there
@@ -457,6 +458,19 @@ class EventCreationHandler:

        self._external_cache = hs.get_external_cache()

+        # Stores the state groups we've recently added to the joined hosts
+        # external cache. Note that the timeout must be significantly less than
+        # the TTL on the external cache.
+        self._external_cache_joined_hosts_updates = (
+            None
+        )  # type: Optional[ExpiringCache]
+        if self._external_cache.is_enabled():
+            self._external_cache_joined_hosts_updates = ExpiringCache(
+                "_external_cache_joined_hosts_updates",
+                self.clock,
+                expiry_ms=30 * 60 * 1000,
+            )
+
    async def create_event(
        self,
        requester: Requester,
@@ -967,7 +981,7 @@ class EventCreationHandler:

        await self.action_generator.handle_push_actions_for_event(event, context)

-        await self.cache_joined_hosts_for_event(event)
+        await self.cache_joined_hosts_for_event(event, context)

        try:
            # If we're a worker we need to hit out to the master.
@@ -1008,7 +1022,9 @@ class EventCreationHandler:
            await self.store.remove_push_actions_from_staging(event.event_id)
            raise

-    async def cache_joined_hosts_for_event(self, event: EventBase) -> None:
+    async def cache_joined_hosts_for_event(
+        self, event: EventBase, context: EventContext
+    ) -> None:
        """Precalculate the joined hosts at the event, when using Redis, so that
        external federation senders don't have to recalculate it themselves.
        """
@@ -1016,6 +1032,9 @@ class EventCreationHandler:
        if not self._external_cache.is_enabled():
            return

+        # If external cache is enabled we should always have this.
+        assert self._external_cache_joined_hosts_updates is not None
+
        # We actually store two mappings, event ID -> prev state group,
        # state group -> joined hosts, which is much more space efficient
        # than event ID -> joined hosts.
@@ -1023,22 +1042,28 @@ class EventCreationHandler:
        # Note: We have to cache event ID -> prev state group, as we don't
        # store that in the DB.
        #
-        # Note: We always set the state group -> joined hosts cache, even if
-        # we already set it, so that the expiry time is reset.
+        # Note: We set the state group -> joined hosts cache if it hasn't been
+        # set for a while, so that the expiry time is reset.

        state_entry = await self.state.resolve_state_groups_for_events(
            event.room_id, event_ids=event.prev_event_ids()
        )

        if state_entry.state_group:
-            joined_hosts = await self.store.get_joined_hosts(event.room_id, state_entry)
-
            await self._external_cache.set(
                "event_to_prev_state_group",
                event.event_id,
                state_entry.state_group,
                expiry_ms=60 * 60 * 1000,
            )
+
+            if state_entry.state_group in self._external_cache_joined_hosts_updates:
+                return
+
+            joined_hosts = await self.store.get_joined_hosts(event.room_id, state_entry)
+
+            # Note that the expiry times must be larger than the expiry time in
+            # _external_cache_joined_hosts_updates.
            await self._external_cache.set(
                "get_joined_hosts",
                str(state_entry.state_group),
@@ -1046,6 +1071,8 @@ class EventCreationHandler:
                expiry_ms=60 * 60 * 1000,
            )

+            self._external_cache_joined_hosts_updates[state_entry.state_group] = None
+
    async def _validate_canonical_alias(
        self, directory_handler, room_alias_str: str, expected_room_id: str
    ) -> None:
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -1212,7 +1212,16 @@ class PresenceHandler(BasePresenceHandler):
                max_pos, deltas = await self.store.get_current_state_deltas(
                    self._event_pos, room_max_stream_ordering
                )
-                await self._handle_state_delta(deltas)
+
+                # We may get multiple deltas for different rooms, but we want to
+                # handle them on a room by room basis, so we batch them up by
+                # room.
+                deltas_by_room: Dict[str, List[JsonDict]] = {}
+                for delta in deltas:
+                    deltas_by_room.setdefault(delta["room_id"], []).append(delta)
+
+                for room_id, deltas_for_room in deltas_by_room.items():
+                    await self._handle_state_delta(room_id, deltas_for_room)

                self._event_pos = max_pos

@@ -1221,17 +1230,21 @@ class PresenceHandler(BasePresenceHandler):
                    max_pos
                )

-    async def _handle_state_delta(self, deltas: List[JsonDict]) -> None:
-        """Process current state deltas to find new joins that need to be
-        handled.
+    async def _handle_state_delta(self, room_id: str, deltas: List[JsonDict]) -> None:
+        """Process current state deltas for the room to find new joins that need
+        to be handled.
        """
-        # A map of destination to a set of user state that they should receive
-        presence_destinations = {}  # type: Dict[str, Set[UserPresenceState]]
+
+        # Sets of newly joined users. Note that if the local server is
+        # joining a remote room for the first time we'll see both the joining
+        # user and all remote users as newly joined.
+        newly_joined_users = set()

        for delta in deltas:
+            assert room_id == delta["room_id"]
+
            typ = delta["type"]
            state_key = delta["state_key"]
-            room_id = delta["room_id"]
            event_id = delta["event_id"]
            prev_event_id = delta["prev_event_id"]

@@ -1260,72 +1273,55 @@ class PresenceHandler(BasePresenceHandler):
                    # Ignore changes to join events.
                    continue

-            # Retrieve any user presence state updates that need to be sent as a result,
-            # and the destinations that need to receive it
-            destinations, user_presence_states = await self._on_user_joined_room(
-                room_id, state_key
-            )
+            newly_joined_users.add(state_key)

-            # Insert the destinations and respective updates into our destinations dict
-            for destination in destinations:
-                presence_destinations.setdefault(destination, set()).update(
-                    user_presence_states
-                )
+        if not newly_joined_users:
+            # If nobody has joined then there's nothing to do.
+            return

-        # Send out user presence updates for each destination
-        for destination, user_state_set in presence_destinations.items():
-            self._federation_queue.send_presence_to_destinations(
-                destinations=[destination], states=user_state_set
-            )
+        # We want to send:
+        #   1. presence states of all local users in the room to newly joined
+        #      remote servers
+        #   2. presence states of newly joined users to all remote servers in
+        #      the room.
+        #
+        # TODO: Only send presence states to remote hosts that don't already
+        # have them (because they already share rooms).

-    async def _on_user_joined_room(
-        self, room_id: str, user_id: str
-    ) -> Tuple[List[str], List[UserPresenceState]]:
-        """Called when we detect a user joining the room via the current state
-        delta stream. Returns the destinations that need to be updated and the
-        presence updates to send to them.
+        # Get all the users who were already in the room, by fetching the
+        # current users in the room and removing the newly joined users.
+        users = await self.store.get_users_in_room(room_id)
+        prev_users = set(users) - newly_joined_users

-        Args:
-            room_id: The ID of the room that the user has joined.
-            user_id: The ID of the user that has joined the room.
+        # Construct sets for all the local users and remote hosts that were
+        # already in the room
+        prev_local_users = []
+        prev_remote_hosts = set()
+        for user_id in prev_users:
+            if self.is_mine_id(user_id):
+                prev_local_users.append(user_id)
+            else:
+                prev_remote_hosts.add(get_domain_from_id(user_id))

-        Returns:
-            A tuple of destinations and presence updates to send to them.
-        """
-        if self.is_mine_id(user_id):
-            # If this is a local user then we need to send their presence
-            # out to hosts in the room (who don't already have it)
+        # Similarly, construct sets for all the local users and remote hosts
+        # that were *not* already in the room. Care needs to be taken with the
+        # calculating the remote hosts, as a host may have already been in the
+        # room even if there is a newly joined user from that host.
+        newly_joined_local_users = []
+        newly_joined_remote_hosts = set()
+        for user_id in newly_joined_users:
+            if self.is_mine_id(user_id):
+                newly_joined_local_users.append(user_id)
+            else:
+                host = get_domain_from_id(user_id)
+                if host not in prev_remote_hosts:
+                    newly_joined_remote_hosts.add(host)

-            # TODO: We should be able to filter the hosts down to those that
-            # haven't previously seen the user
-
-            remote_hosts = await self.state.get_current_hosts_in_room(room_id)
-
-            # Filter out ourselves.
-            filtered_remote_hosts = [
-                host for host in remote_hosts if host != self.server_name
-            ]
-
-            state = await self.current_state_for_user(user_id)
-            return filtered_remote_hosts, [state]
-        else:
-            # A remote user has joined the room, so we need to:
-            #   1. Check if this is a new server in the room
-            #   2. If so send any presence they don't already have for
-            #      local users in the room.
-
-            # TODO: We should be able to filter the users down to those that
-            # the server hasn't previously seen
-
-            # TODO: Check that this is actually a new server joining the
-            # room.
-
-            remote_host = get_domain_from_id(user_id)
-
-            users = await self.state.get_current_users_in_room(room_id)
-            user_ids = list(filter(self.is_mine_id, users))
-
-            states_d = await self.current_state_for_users(user_ids)
+        # Send presence states of all local users in the room to newly joined
+        # remote servers. (We actually only send states for local users already
+        # in the room, as we'll send states for newly joined local users below.)
+        if prev_local_users and newly_joined_remote_hosts:
+            local_states = await self.current_state_for_users(prev_local_users)

            # Filter out old presence, i.e. offline presence states where
            # the user hasn't been active for a week. We can change this
@@ -1335,13 +1331,27 @@ class PresenceHandler(BasePresenceHandler):
            now = self.clock.time_msec()
            states = [
                state
-                for state in states_d.values()
+                for state in local_states.values()
                if state.state != PresenceState.OFFLINE
                or now - state.last_active_ts < 7 * 24 * 60 * 60 * 1000
                or state.status_msg is not None
            ]

-            return [remote_host], states
+            self._federation_queue.send_presence_to_destinations(
+                destinations=newly_joined_remote_hosts,
+                states=states,
+            )
+
+        # Send presence states of newly joined users to all remote servers in
+        # the room
+        if newly_joined_local_users and (
+            prev_remote_hosts or newly_joined_remote_hosts
+        ):
+            local_states = await self.current_state_for_users(newly_joined_local_users)
+            self._federation_queue.send_presence_to_destinations(
+                destinations=prev_remote_hosts | newly_joined_remote_hosts,
+                states=list(local_states.values()),
+            )


 def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> bool:
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1327,7 +1327,7 @@ class RoomShutdownHandler:
            new_room_id = None
            logger.info("Shutting down room %r", room_id)

-        users = await self.state.get_current_users_in_room(room_id)
+        users = await self.store.get_users_in_room(room_id)
        kicked_users = []
        failed_to_kick_users = []
        for user_id in users:
--- a/synapse/handlers/space_summary.py
+++ b/synapse/handlers/space_summary.py
@@ -14,6 +14,7 @@

 import itertools
 import logging
+import re
 from collections import deque
 from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple, cast

@@ -226,6 +227,23 @@ class SpaceSummaryHandler:
        suggested_only: bool,
        max_children: Optional[int],
    ) -> Tuple[Sequence[JsonDict], Sequence[JsonDict]]:
+        """
+        Generate a room entry and a list of event entries for a given room.
+
+        Args:
+            requester: The requesting user, or None if this is over federation.
+            room_id: The room ID to summarize.
+            suggested_only: True if only suggested children should be returned.
+                Otherwise, all children are returned.
+            max_children: The maximum number of children to return for this node.
+
+        Returns:
+            A tuple of:
+                An iterable of a single value of the room.
+
+                An iterable of the sorted children events. This may be limited
+                to a maximum size or may include all children.
+        """
        if not await self._is_room_accessible(room_id, requester):
            return (), ()

@@ -288,6 +306,7 @@ class SpaceSummaryHandler:
            ev.data
            for ev in res.events
            if ev.event_type == EventTypes.MSC1772_SPACE_CHILD
+            or ev.event_type == EventTypes.SpaceChild
        )

    async def _is_room_accessible(self, room_id: str, requester: Optional[str]) -> bool:
@@ -331,7 +350,9 @@ class SpaceSummaryHandler:
        )

        # TODO: update once MSC1772 lands
-        room_type = create_event.content.get(EventContentFields.MSC1772_ROOM_TYPE)
+        room_type = create_event.content.get(EventContentFields.ROOM_TYPE)
+        if not room_type:
+            room_type = create_event.content.get(EventContentFields.MSC1772_ROOM_TYPE)

        entry = {
            "room_id": stats["room_id"],
@@ -344,6 +365,7 @@ class SpaceSummaryHandler:
                stats["history_visibility"] == HistoryVisibility.WORLD_READABLE
            ),
            "guest_can_join": stats["guest_access"] == "can_join",
+            "creation_ts": create_event.origin_server_ts,
            "room_type": room_type,
        }

@@ -353,6 +375,18 @@ class SpaceSummaryHandler:
        return room_entry

    async def _get_child_events(self, room_id: str) -> Iterable[EventBase]:
+        """
+        Get the child events for a given room.
+
+        The returned results are sorted for stability.
+
+        Args:
+            room_id: The room id to get the children of.
+
+        Returns:
+            An iterable of sorted child events.
+        """
+
        # look for child rooms/spaces.
        current_state_ids = await self._store.get_current_state_ids(room_id)

@@ -360,13 +394,15 @@ class SpaceSummaryHandler:
            [
                event_id
                for key, event_id in current_state_ids.items()
-                # TODO: update once MSC1772 lands
+                # TODO: update once MSC1772 has been FCP for a period of time.
                if key[0] == EventTypes.MSC1772_SPACE_CHILD
+                or key[0] == EventTypes.SpaceChild
            ]
        )

-        # filter out any events without a "via" (which implies it has been redacted)
-        return (e for e in events if _has_valid_via(e))
+        # filter out any events without a "via" (which implies it has been redacted),
+        # and order to ensure we return stable results.
+        return sorted(filter(_has_valid_via, events), key=_child_events_comparison_key)


@attr.s(frozen=True, slots=True)
@@ -392,3 +428,39 @@ def _is_suggested_child_event(edge_event: EventBase) -> bool:
        return True
    logger.debug("Ignorning not-suggested child %s", edge_event.state_key)
    return False
+
+
+# Order may only contain characters in the range of \x20 (space) to \x7F (~).
+_INVALID_ORDER_CHARS_RE = re.compile(r"[^\x20-\x7F]")
+
+
+def _child_events_comparison_key(child: EventBase) -> Tuple[bool, Optional[str], str]:
+    """
+    Generate a value for comparing two child events for ordering.
+
+    The rules for ordering are supposed to be:
+
+    1. The 'order' key, if it is valid.
+    2. The 'origin_server_ts' of the 'm.room.create' event.
+    3. The 'room_id'.
+
+    But we skip step 2 since we may not have any state from the room.
+
+    Args:
+        child: The event for generating a comparison key.
+
+    Returns:
+        The comparison key as a tuple of:
+            False if the ordering is valid.
+            The ordering field.
+            The room ID.
+    """
+    order = child.content.get("order")
+    # If order is not a string or doesn't meet the requirements, ignore it.
+    if not isinstance(order, str):
+        order = None
+    elif len(order) > 50 or _INVALID_ORDER_CHARS_RE.search(order):
+        order = None
+
+    # Items without an order come last.
+    return (order is None, order, child.room_id)
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1190,7 +1190,7 @@ class SyncHandler:

            # Step 1b, check for newly joined rooms
            for room_id in newly_joined_rooms:
-                joined_users = await self.state.get_current_users_in_room(room_id)
+                joined_users = await self.store.get_users_in_room(room_id)
                newly_joined_or_invited_users.update(joined_users)

            # TODO: Check that these users are actually new, i.e. either they
@@ -1206,7 +1206,7 @@ class SyncHandler:

            # Now find users that we no longer track
            for room_id in newly_left_rooms:
-                left_users = await self.state.get_current_users_in_room(room_id)
+                left_users = await self.store.get_users_in_room(room_id)
                newly_left_users.update(left_users)

            # Remove any users that we still share a room with.
@@ -1361,7 +1361,7 @@ class SyncHandler:

        extra_users_ids = set(newly_joined_or_invited_users)
        for room_id in newly_joined_rooms:
-            users = await self.state.get_current_users_in_room(room_id)
+            users = await self.store.get_users_in_room(room_id)
            extra_users_ids.update(users)
        extra_users_ids.discard(user.to_string())

--- a/synapse/logging/init.py
+++ b/synapse/logging/init.py
@@ -12,8 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# These are imported to allow for nicer logging configuration files.
+import logging
+
 from synapse.logging._remote import RemoteHandler
 from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter

+# These are imported to allow for nicer logging configuration files.
 __all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"]
+
+# Debug logger for https://github.com/matrix-org/synapse/issues/9533 etc
+issue9533_logger = logging.getLogger("synapse.9533_debug")
--- a/synapse/metrics/init.py
+++ b/synapse/metrics/init.py
@@ -535,6 +535,13 @@ class ReactorLastSeenMetric:

 REGISTRY.register(ReactorLastSeenMetric())

+# The minimum time in seconds between GCs for each generation, regardless of the current GC
+# thresholds and counts.
+MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
+
+# The time (in seconds since the epoch) of the last time we did a GC for each generation.
+_last_gc = [0.0, 0.0, 0.0]
+

 def runUntilCurrentTimer(reactor, func):
    @functools.wraps(func)
@@ -575,11 +582,16 @@ def runUntilCurrentTimer(reactor, func):
            return ret

        # Check if we need to do a manual GC (since its been disabled), and do
-        # one if necessary.
+        # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
+        # promote an object into gen 2, and we don't want to handle the same
+        # object multiple times.
        threshold = gc.get_threshold()
        counts = gc.get_count()
        for i in (2, 1, 0):
-            if threshold[i] < counts[i]:
+            # We check if we need to do one based on a straightforward
+            # comparison between the threshold and count. We also do an extra
+            # check to make sure that we don't a GC too often.
+            if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
                if i == 0:
                    logger.debug("Collecting gc %d", i)
                else:
@@ -589,6 +601,8 @@ def runUntilCurrentTimer(reactor, func):
                unreachable = gc.collect(i)
                end = time.time()

+                _last_gc[i] = end
+
                gc_time.labels(i).observe(end - start)
                gc_unreachable.labels(i).set(unreachable)

@@ -615,6 +629,7 @@ try:
 except AttributeError:
    pass

+
 __all__ = [
    "MetricsResource",
    "generate_latest",
--- a/synapse/metrics/jemalloc.py
+++ b/synapse/metrics/jemalloc.py
@@ -0,0 +1,196 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ctypes
+import logging
+import os
+import re
+from typing import Optional
+
+from synapse.metrics import REGISTRY, GaugeMetricFamily
+
+logger = logging.getLogger(__name__)
+
+
+def _setup_jemalloc_stats():
+    """Checks to see if jemalloc is loaded, and hooks up a collector to record
+    statistics exposed by jemalloc.
+    """
+
+    # Try to find the loaded jemalloc shared library, if any. We need to
+    # introspect into what is loaded, rather than loading whatever is on the
+    # path, as if we load a *different* jemalloc version things will seg fault.
+
+    # We look in `/proc/self/maps`, which only exists on linux.
+    if not os.path.exists("/proc/self/maps"):
+        logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
+        return
+
+    # We're looking for a path at the end of the line that includes
+    # "libjemalloc".
+    regex = re.compile(r"/\S+/libjemalloc.*$")
+
+    jemalloc_path = None
+    with open("/proc/self/maps") as f:
+        for line in f:
+            match = regex.search(line.strip())
+            if match:
+                jemalloc_path = match.group()
+
+    if not jemalloc_path:
+        # No loaded jemalloc was found.
+        logger.debug("jemalloc not found")
+        return
+
+    logger.debug("Found jemalloc at %s", jemalloc_path)
+
+    jemalloc = ctypes.CDLL(jemalloc_path)
+
+    def _mallctl(
+        name: str, read: bool = True, write: Optional[int] = None
+    ) -> Optional[int]:
+        """Wrapper around `mallctl` for reading and writing integers to
+        jemalloc.
+
+        Args:
+            name: The name of the option to read from/write to.
+            read: Whether to try and read the value.
+            write: The value to write, if given.
+
+        Returns:
+            The value read if `read` is True, otherwise None.
+
+        Raises:
+            An exception if `mallctl` returns a non-zero error code.
+        """
+
+        input_var = None
+        input_var_ref = None
+        input_len_ref = None
+        if read:
+            input_var = ctypes.c_size_t(0)
+            input_len = ctypes.c_size_t(ctypes.sizeof(input_var))
+
+            input_var_ref = ctypes.byref(input_var)
+            input_len_ref = ctypes.byref(input_len)
+
+        write_var_ref = None
+        write_len = ctypes.c_size_t(0)
+        if write is not None:
+            write_var = ctypes.c_size_t(write)
+            write_len = ctypes.c_size_t(ctypes.sizeof(write_var))
+
+            write_var_ref = ctypes.byref(write_var)
+
+        # The interface is:
+        #
+        #   int mallctl(
+        #       const char *name,
+        #       void *oldp,
+        #       size_t *oldlenp,
+        #       void *newp,
+        #       size_t newlen
+        #   )
+        #
+        # Where oldp/oldlenp is a buffer where the old value will be written to
+        # (if not null), and newp/newlen is the buffer with the new value to set
+        # (if not null). Note that they're all references *except* newlen.
+        result = jemalloc.mallctl(
+            name.encode("ascii"),
+            input_var_ref,
+            input_len_ref,
+            write_var_ref,
+            write_len,
+        )
+
+        if result != 0:
+            raise Exception("Failed to call mallctl")
+
+        if input_var is None:
+            return None
+
+        return input_var.value
+
+    def _jemalloc_refresh_stats() -> None:
+        """Request that jemalloc updates its internal statistics. This needs to
+        be called before querying for stats, otherwise it will return stale
+        values.
+        """
+        try:
+            _mallctl("epoch", read=False, write=1)
+        except Exception as e:
+            logger.warning("Failed to reload jemalloc stats: %s", e)
+
+    class JemallocCollector:
+        """Metrics for internal jemalloc stats."""
+
+        def collect(self):
+            _jemalloc_refresh_stats()
+
+            g = GaugeMetricFamily(
+                "jemalloc_stats_app_memory_bytes",
+                "The stats reported by jemalloc",
+                labels=["type"],
+            )
+
+            # Read the relevant global stats from jemalloc. Note that these may
+            # not be accurate if python is configured to use its internal small
+            # object allocator (which is on by default, disable by setting the
+            # env `PYTHONMALLOC=malloc`).
+            #
+            # See the jemalloc manpage for details about what each value means,
+            # roughly:
+            #   - allocated ─ Total number of bytes allocated by the app
+            #   - active ─ Total number of bytes in active pages allocated by
+            #     the application, this is bigger than `allocated`.
+            #   - resident ─ Maximum number of bytes in physically resident data
+            #     pages mapped by the allocator, comprising all pages dedicated
+            #     to allocator metadata, pages backing active allocations, and
+            #     unused dirty pages. This is bigger than `active`.
+            #   - mapped ─ Total number of bytes in active extents mapped by the
+            #     allocator.
+            #   - metadata ─ Total number of bytes dedicated to jemalloc
+            #     metadata.
+            for t in (
+                "allocated",
+                "active",
+                "resident",
+                "mapped",
+                "metadata",
+            ):
+                try:
+                    value = _mallctl(f"stats.{t}")
+                except Exception as e:
+                    # There was an error fetching the value, skip.
+                    logger.warning("Failed to read jemalloc stats.%s: %s", t, e)
+                    continue
+
+                g.add_metric([t], value=value)
+
+            yield g
+
+    REGISTRY.register(JemallocCollector())
+
+    logger.debug("Added jemalloc stats")
+
+
+def setup_jemalloc_stats():
+    """Try to setup jemalloc stats, if jemalloc is loaded."""
+
+    try:
+        _setup_jemalloc_stats()
+    except Exception as e:
+        # This should only happen if we find the loaded jemalloc library, but
+        # fail to load it somehow (e.g. we somehow picked the wrong version).
+        logger.info("Failed to setup collector to record jemalloc stats: %s", e)
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -38,6 +38,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership
 from synapse.api.errors import AuthError
 from synapse.events import EventBase
 from synapse.handlers.presence import format_user_presence_state
+from synapse.logging import issue9533_logger
 from synapse.logging.context import PreserveLoggingContext
 from synapse.logging.opentracing import log_kv, start_active_span
 from synapse.logging.utils import log_function
@@ -426,6 +427,13 @@ class Notifier:
            for room in rooms:
                user_streams |= self.room_to_user_streams.get(room, set())

+            if stream_key == "to_device_key":
+                issue9533_logger.debug(
+                    "to-device messages stream id %s, awaking streams for %s",
+                    new_token,
+                    users,
+                )
+
            time_now_ms = self.clock.time_msec()
            for user_stream in user_streams:
                try:
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Pattern, Tuple, Union

 from synapse.events import EventBase
 from synapse.types import UserID
+from synapse.util import glob_to_regex, re_word_boundary
 from synapse.util.caches.lrucache import LruCache

 logger = logging.getLogger(__name__)
@@ -183,7 +184,7 @@ class PushRuleEvaluatorForEvent:
        r = regex_cache.get((display_name, False, True), None)
        if not r:
            r1 = re.escape(display_name)
-            r1 = _re_word_boundary(r1)
+            r1 = re_word_boundary(r1)
            r = re.compile(r1, flags=re.IGNORECASE)
            regex_cache[(display_name, False, True)] = r

@@ -212,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
    try:
        r = regex_cache.get((glob, True, word_boundary), None)
        if not r:
-            r = _glob_to_re(glob, word_boundary)
+            r = glob_to_regex(glob, word_boundary)
            regex_cache[(glob, True, word_boundary)] = r
        return bool(r.search(value))
    except re.error:
@@ -220,56 +221,6 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
        return False


-def _glob_to_re(glob: str, word_boundary: bool) -> Pattern:
-    """Generates regex for a given glob.
-
-    Args:
-        glob
-        word_boundary: Whether to match against word boundaries or entire string.
-    """
-    if IS_GLOB.search(glob):
-        r = re.escape(glob)
-
-        r = r.replace(r"\*", ".*?")
-        r = r.replace(r"\?", ".")
-
-        # handle [abc], [a-z] and [!a-z] style ranges.
-        r = GLOB_REGEX.sub(
-            lambda x: (
-                "[%s%s]" % (x.group(1) and "^" or "", x.group(2).replace(r"\\\-", "-"))
-            ),
-            r,
-        )
-        if word_boundary:
-            r = _re_word_boundary(r)
-
-            return re.compile(r, flags=re.IGNORECASE)
-        else:
-            r = "^" + r + "$"
-
-            return re.compile(r, flags=re.IGNORECASE)
-    elif word_boundary:
-        r = re.escape(glob)
-        r = _re_word_boundary(r)
-
-        return re.compile(r, flags=re.IGNORECASE)
-    else:
-        r = "^" + re.escape(glob) + "$"
-        return re.compile(r, flags=re.IGNORECASE)
-
-
-def _re_word_boundary(r: str) -> str:
-    """
-    Adds word boundary characters to the start and end of an
-    expression to require that the match occur as a whole word,
-    but do so respecting the fact that strings starting or ending
-    with non-word characters will change word boundaries.
-    """
-    # we can't use \b as it chokes on unicode. however \W seems to be okay
-    # as shorthand for [^0-9A-Za-z_].
-    return r"(^|\W)%s(\W|$)" % (r,)
-
-
 def _flatten_dict(
    d: Union[EventBase, dict],
    prefix: Optional[List[str]] = None,
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -78,7 +78,8 @@ REQUIREMENTS = [
    # we use attr.validators.deep_iterable, which arrived in 19.1.0 (Note:
    # Fedora 31 only has 19.1, so if we want to upgrade we should wait until 33
    # is out in November.)
-    "attrs>=19.1.0",
+    # Note: 21.1.0 broke `/sync`, see #9936
+    "attrs>=19.1.0,!=21.1.0",
    "netaddr>=0.7.18",
    "Jinja2>=2.9",
    "bleach>=1.4.3",
@@ -116,6 +117,8 @@ CONDITIONAL_REQUIREMENTS = {
    # hiredis is not a *strict* dependency, but it makes things much faster.
    # (if it is not installed, we fall back to slow code.)
    "redis": ["txredisapi>=1.4.7", "hiredis"],
+    # Required to use experimental `caches.track_memory_usage` config option.
+    "cache_memory": ["pympler"],
 }

 ALL_OPTIONAL_REQUIREMENTS = set()  # type: Set[str]
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -51,7 +51,6 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

-
 # How long we allow callers to wait for replication updates before timing out.
 _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 30

--- a/synapse/replication/tcp/external_cache.py
+++ b/synapse/replication/tcp/external_cache.py
@@ -15,7 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Any, Optional

-from prometheus_client import Counter
+from prometheus_client import Counter, Histogram

 from synapse.logging.context import make_deferred_yieldable
 from synapse.util import json_decoder, json_encoder
@@ -35,6 +35,20 @@ get_counter = Counter(
    labelnames=["cache_name", "hit"],
 )

+response_timer = Histogram(
+    "synapse_external_cache_response_time_seconds",
+    "Time taken to get a response from Redis for a cache get/set request",
+    labelnames=["method"],
+    buckets=(
+        0.001,
+        0.002,
+        0.005,
+        0.01,
+        0.02,
+        0.05,
+    ),
+)
+

 logger = logging.getLogger(__name__)

@@ -72,13 +86,14 @@ class ExternalCache:

        logger.debug("Caching %s %s: %r", cache_name, key, encoded_value)

-        return await make_deferred_yieldable(
-            self._redis_connection.set(
-                self._get_redis_key(cache_name, key),
-                encoded_value,
-                pexpire=expiry_ms,
+        with response_timer.labels("set").time():
+            return await make_deferred_yieldable(
+                self._redis_connection.set(
+                    self._get_redis_key(cache_name, key),
+                    encoded_value,
+                    pexpire=expiry_ms,
+                )
            )
-        )

    async def get(self, cache_name: str, key: str) -> Optional[Any]:
        """Look up a key/value in the named cache."""
@@ -86,9 +101,10 @@ class ExternalCache:
        if self._redis_connection is None:
            return None

-        result = await make_deferred_yieldable(
-            self._redis_connection.get(self._get_redis_key(cache_name, key))
-        )
+        with response_timer.labels("get").time():
+            result = await make_deferred_yieldable(
+                self._redis_connection.get(self._get_redis_key(cache_name, key))
+            )

        logger.debug("Got cache result %s %s: %r", cache_name, key, result)

--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -1020,6 +1020,7 @@ class RoomSpaceSummaryRestServlet(RestServlet):
            max_rooms_per_space=parse_integer(request, "max_rooms_per_space"),
        )

+    # TODO When switching to the stable endpoint, remove the POST handler.
    async def on_POST(
        self, request: SynapseRequest, room_id: str
    ) -> Tuple[int, JsonDict]:
--- a/synapse/state/init.py
+++ b/synapse/state/init.py
@@ -213,19 +213,23 @@ class StateHandler:
        return ret.state

    async def get_current_users_in_room(
-        self, room_id: str, latest_event_ids: Optional[List[str]] = None
+        self, room_id: str, latest_event_ids: List[str]
    ) -> Dict[str, ProfileInfo]:
        """
        Get the users who are currently in a room.

+        Note: This is much slower than using the equivalent method
+        `DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`,
+        so this should only be used when wanting the users at a particular point
+        in the room.
+
        Args:
            room_id: The ID of the room.
            latest_event_ids: Precomputed list of latest event IDs. Will be computed if None.
        Returns:
            Dictionary of user IDs to their profileinfo.
        """
-        if not latest_event_ids:
-            latest_event_ids = await self.store.get_latest_event_ids_in_room(room_id)
+
        assert latest_event_ids is not None

        logger.debug("calling resolve_state_groups from get_current_users_in_room")
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -69,6 +69,7 @@ class SQLBaseStore(metaclass=ABCMeta):
            self._attempt_to_invalidate_cache("is_host_joined", (room_id, host))

        self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
        self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
        self._attempt_to_invalidate_cache("get_current_state_ids", (room_id,))

--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -15,6 +15,7 @@
 import logging
 from typing import List, Optional, Tuple

+from synapse.logging import issue9533_logger
 from synapse.logging.opentracing import log_kv, set_tag, trace
 from synapse.replication.tcp.streams import ToDeviceStream
 from synapse.storage._base import SQLBaseStore, db_to_json
@@ -404,6 +405,13 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                ],
            )

+            if remote_messages_by_destination:
+                issue9533_logger.debug(
+                    "Queued outgoing to-device messages with stream_id %i for %s",
+                    stream_id,
+                    list(remote_messages_by_destination.keys()),
+                )
+
        async with self._device_inbox_id_gen.get_next() as stream_id:
            now_ms = self.clock.time_msec()
            await self.db_pool.runInteraction(
@@ -533,6 +541,16 @@ class DeviceInboxWorkerStore(SQLBaseStore):
            ],
        )

+        issue9533_logger.debug(
+            "Stored to-device messages with stream_id %i for %s",
+            stream_id,
+            [
+                (user_id, device_id)
+                for (user_id, messages_by_device) in local_by_user_then_device.items()
+                for device_id in messages_by_device.keys()
+            ],
+        )
+

 class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
    DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -84,7 +84,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore):
                if keys:
                    result["keys"] = keys

-                device_display_name = device.display_name
+                device_display_name = None
+                if self.hs.config.allow_device_name_lookup_over_federation:
+                    device_display_name = device.display_name
                if device_display_name:
                    result["device_display_name"] = device_display_name

--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -205,8 +205,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):

        def _get_users_in_room_with_profiles(txn) -> Dict[str, ProfileInfo]:
            sql = """
-                SELECT user_id, display_name, avatar_url FROM room_memberships
-                WHERE room_id = ? AND membership = ?
+                SELECT state_key, display_name, avatar_url FROM room_memberships as m
+                INNER JOIN current_state_events as c
+                ON m.event_id = c.event_id
+                AND m.room_id = c.room_id
+                AND m.user_id = c.state_key
+                WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
            """
            txn.execute(sql, (room_id, Membership.JOIN))

--- a/synapse/storage/databases/main/schema/full_schemas/README.md
+++ b/synapse/storage/databases/main/schema/full_schemas/README.md
@@ -1,21 +0,0 @@
-# Synapse Database Schemas
-
-These schemas are used as a basis to create brand new Synapse databases, on both
-SQLite3 and Postgres.
-
-## Building full schema dumps
-
-If you want to recreate these schemas, they need to be made from a database that
-has had all background updates run.
-
-To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
-`full.sql.postgres ` and `full.sql.sqlite` files. 
-
-Ensure postgres is installed and your user has the ability to run bash commands
-such as `createdb`, then call
-
-    ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
-
-There are currently two folders with full-schema snapshots. `16` is a snapshot
-from 2015, for historical reference. The other contains the most recent full
-schema snapshot.
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -142,8 +142,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
            batch_size (int): Maximum number of state events to process
                per cycle.
        """
-        state = self.hs.get_state_handler()
-
        # If we don't have progress filed, delete everything.
        if not progress:
            await self.delete_all_from_user_dir()
@@ -197,7 +195,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                    room_id
                )

-                users_with_profile = await state.get_current_users_in_room(room_id)
+                users_with_profile = await self.get_users_in_room_with_profiles(room_id)
                user_ids = set(users_with_profile)

                # Update each user in the user directory.
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -26,16 +26,13 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.storage.database import LoggingDatabaseConnection
 from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.engines.postgres import PostgresEngine
+from synapse.storage.schema import SCHEMA_VERSION
 from synapse.storage.types import Cursor

 logger = logging.getLogger(__name__)


-# Remember to update this number every time a change is made to database
-# schema files, so the users will be informed on server restarts.
-SCHEMA_VERSION = 59
-
-dir_path = os.path.abspath(os.path.dirname(__file__))
+schema_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "schema")


 class PrepareDatabaseException(Exception):
@@ -167,7 +164,14 @@ def _setup_new_database(

    Example directory structure:

-        schema/
+    schema/
+        common/
+            delta/
+                ...
+            full_schemas/
+                11/
+                    foo.sql
+        main/
            delta/
                ...
            full_schemas/
@@ -175,15 +179,14 @@ def _setup_new_database(
                    test.sql
                    ...
                11/
-                    foo.sql
                    bar.sql
                ...

    In the example foo.sql and bar.sql would be run, and then any delta files
    for versions strictly greater than 11.

-    Note: we apply the full schemas and deltas from the top level `schema/`
-    folder as well those in the data stores specified.
+    Note: we apply the full schemas and deltas from the `schema/common`
+    folder as well those in the databases specified.

    Args:
        cur: a database cursor
@@ -195,12 +198,12 @@ def _setup_new_database(
    # configured to our liking.
    database_engine.check_new_database(cur)

-    current_dir = os.path.join(dir_path, "schema", "full_schemas")
+    full_schemas_dir = os.path.join(schema_path, "common", "full_schemas")

    # First we find the highest full schema version we have
    valid_versions = []

-    for filename in os.listdir(current_dir):
+    for filename in os.listdir(full_schemas_dir):
        try:
            ver = int(filename)
        except ValueError:
@@ -218,15 +221,13 @@ def _setup_new_database(

    logger.debug("Initialising schema v%d", max_current_ver)

-    # Now lets find all the full schema files, both in the global schema and
-    # in data store schemas.
-    directories = [os.path.join(current_dir, str(max_current_ver))]
+    # Now let's find all the full schema files, both in the common schema and
+    # in database schemas.
+    directories = [os.path.join(full_schemas_dir, str(max_current_ver))]
    directories.extend(
        os.path.join(
-            dir_path,
-            "databases",
+            schema_path,
            database,
-            "schema",
            "full_schemas",
            str(max_current_ver),
        )
@@ -357,6 +358,9 @@ def _upgrade_existing_database(
        check_database_before_upgrade(cur, database_engine, config)

    start_ver = current_version
+
+    # if we got to this schema version by running a full_schema rather than a series
+    # of deltas, we should not run the deltas for this version.
    if not upgraded:
        start_ver += 1

@@ -385,12 +389,10 @@ def _upgrade_existing_database(
        # directories for schema updates.

        # First we find the directories to search in
-        delta_dir = os.path.join(dir_path, "schema", "delta", str(v))
+        delta_dir = os.path.join(schema_path, "common", "delta", str(v))
        directories = [delta_dir]
        for database in databases:
-            directories.append(
-                os.path.join(dir_path, "databases", database, "schema", "delta", str(v))
-            )
+            directories.append(os.path.join(schema_path, database, "delta", str(v)))

        # Used to check if we have any duplicate file names
        file_name_counter = Counter()  # type: CounterType[str]
@@ -621,8 +623,8 @@ def _get_or_create_schema_state(
    txn: Cursor, database_engine: BaseDatabaseEngine
 ) -> Optional[Tuple[int, List[str], bool]]:
    # Bluntly try creating the schema_version tables.
-    schema_path = os.path.join(dir_path, "schema", "schema_version.sql")
-    executescript(txn, schema_path)
+    sql_path = os.path.join(schema_path, "common", "schema_version.sql")
+    executescript(txn, sql_path)

    txn.execute("SELECT version, upgraded FROM schema_version")
    row = txn.fetchone()
--- a/synapse/storage/schema/README.md
+++ b/synapse/storage/schema/README.md
@@ -0,0 +1,37 @@
+# Synapse Database Schemas
+
+This directory contains the schema files used to build Synapse databases.
+
+Synapse supports splitting its datastore across multiple physical databases (which can
+be useful for large installations), and the schema files are therefore split according
+to the logical database they are apply to.
+
+At the time of writing, the following "logical" databases are supported:
+
+* `state` - used to store Matrix room state (more specifically, `state_groups`,
+  their relationships and contents.)
+* `main` - stores everything else.
+
+Addionally, the `common` directory contains schema files for tables which must be
+present on *all* physical databases.
+
+## Full schema dumps
+
+In the `full_schemas` directories, only the most recently-numbered snapshot is useful
+(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical
+reference only.
+
+## Building full schema dumps
+
+If you want to recreate these schemas, they need to be made from a database that
+has had all background updates run.
+
+To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
+`full.sql.postgres` and `full.sql.sqlite` files.
+
+Ensure postgres is installed, then run:
+
+    ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
+
+NB at the time of writing, this script predates the split into separate `state`/`main`
+databases so will require updates to handle that correctly.
--- a/synapse/storage/schema/init.py
+++ b/synapse/storage/schema/init.py
@@ -0,0 +1,17 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Remember to update this number every time a change is made to database
+# schema files, so the users will be informed on server restarts.
+SCHEMA_VERSION = 59
--- a/synapse/storage/schema/common/delta/25/00background_updates.sql
+++ b/synapse/storage/schema/common/delta/25/00background_updates.sql
--- a/synapse/storage/schema/common/delta/35/00background_updates_add_col.sql
+++ b/synapse/storage/schema/common/delta/35/00background_updates_add_col.sql
--- a/synapse/storage/schema/common/delta/58/00background_update_ordering.sql
+++ b/synapse/storage/schema/common/delta/58/00background_update_ordering.sql
--- a/synapse/storage/schema/common/full_schemas/54/full.sql
+++ b/synapse/storage/schema/common/full_schemas/54/full.sql
--- a/synapse/storage/schema/common/schema_version.sql
+++ b/synapse/storage/schema/common/schema_version.sql
--- a/synapse/storage/databases/main/schema/delta/12/v12.sql
+++ b/synapse/storage/databases/main/schema/delta/12/v12.sql
--- a/synapse/storage/databases/main/schema/delta/13/v13.sql
+++ b/synapse/storage/databases/main/schema/delta/13/v13.sql
--- a/synapse/storage/databases/main/schema/delta/14/v14.sql
+++ b/synapse/storage/databases/main/schema/delta/14/v14.sql
--- a/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
+++ b/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
--- a/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
+++ b/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
--- a/synapse/storage/databases/main/schema/delta/15/v15.sql
+++ b/synapse/storage/databases/main/schema/delta/15/v15.sql
--- a/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
--- a/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
--- a/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
+++ b/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
--- a/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
+++ b/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
--- a/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
+++ b/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
--- a/synapse/storage/databases/main/schema/delta/16/users.sql
+++ b/synapse/storage/databases/main/schema/delta/16/users.sql
--- a/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
+++ b/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
--- a/synapse/storage/databases/main/schema/delta/17/server_keys.sql
+++ b/synapse/storage/databases/main/schema/delta/17/server_keys.sql
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`Add experimental option to track memory usage of the caches.`
				`@@ -0,0 +1 @@`
				`Export jemalloc stats to Prometheus if it is being used.`
				`@@ -0,0 +1 @@`
				Add limits to how often Synapse will GC, ensuring that large servers do not end up GC thrashing if `gc_thresholds` has not been correctly set.
				`@@ -0,0 +1 @@`
				`Time response time for external cache requests.`
				`@@ -0,0 +1 @@`
				`Improve performance of sending events for worker-based deployments using Redis.`
				`@@ -0,0 +1 @@`
				`Fix bug where user directory could get out of sync if room visibility and membership changed in quick succession.`
				`@@ -0,0 +1 @@`
				`Improve performance after joining a large room when presence is enabled.`
				`@@ -0,0 +1 @@`
				Add `port` argument to the Postgres database sample config section.
				`@@ -0,0 +1 @@`
				`Added startup_delay to docker healthcheck to reduce waiting time for coming online, updated readme for extra options, contributed by @Maquis196.`
				`@@ -0,0 +1 @@`
				Support stable identifiers for [MSC1772](https://github.com/matrix-org/matrix-doc/pull/1772) Spaces. `m.space.child` events will now be taken into account when populating the experimental spaces summary response. Please see `UPGRADE.rst` if you have customised `room_invite_state_types` in your configuration.