Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 39d98249f4 | |||
| e5afd0ab99 | |||
| 8f2a3cbb70 | |||
| 882911a863 | |||
| 9b1f99ba6b | |||
| 1bc4feb6c9 | |||
| 96bcc5d902 | |||
| ec9224bf9a | |||
| b6aef59334 | |||
| f11fe931f5 | |||
| 827f198177 | |||
| a5fb382a29 | |||
| 5ab7146e19 | |||
| 63e25010d6 | |||
| 25006acc17 | |||
| f75a041f59 | |||
| eee26138fe | |||
| 099b69fb1c | |||
| 1870b44d23 | |||
| 2cfa6a3001 | |||
| 14d8d41658 | |||
| 3d70cc393f | |||
| 66fc166b96 | |||
| afb216c202 | |||
| b0a0fb5c97 |
@@ -10,6 +10,7 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -34,11 +35,20 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Calculate docker image tag
|
||||
id: set-tag
|
||||
uses: docker/metadata-action@master
|
||||
with:
|
||||
images: matrixdotorg/synapse
|
||||
images: |
|
||||
docker.io/matrixdotorg/synapse
|
||||
ghcr.io/matrix-org/synapse
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
|
||||
@@ -5,6 +5,13 @@ on:
|
||||
- cron: 0 8 * * *
|
||||
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
twisted_ref:
|
||||
description: Commit, branch or tag to checkout from upstream Twisted.
|
||||
required: false
|
||||
default: 'trunk'
|
||||
type: string
|
||||
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -29,7 +36,7 @@ jobs:
|
||||
extras: "all"
|
||||
- run: |
|
||||
poetry remove twisted
|
||||
poetry add --extras tls git+https://github.com/twisted/twisted.git#trunk
|
||||
poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref }}
|
||||
poetry install --no-interaction --extras "all test"
|
||||
- name: Remove warn_unused_ignores from mypy config
|
||||
run: sed '/warn_unused_ignores = True/d' -i mypy.ini
|
||||
|
||||
Generated
+24
-13
@@ -13,9 +13,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.69"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
|
||||
checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4"
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
@@ -185,9 +185,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.46"
|
||||
version = "1.0.52"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
|
||||
checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -250,7 +250,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 1.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -261,7 +261,7 @@ checksum = "c8df9be978a2d2f0cdebabb03206ed73b11314701a5bfe71b0d753b81997777f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 1.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -276,9 +276,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.21"
|
||||
version = "1.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
||||
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@@ -323,22 +323,22 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.155"
|
||||
version = "1.0.157"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8"
|
||||
checksum = "707de5fcf5df2b5788fca98dd7eab490bc2fd9b7ef1404defc462833b83f25ca"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.155"
|
||||
version = "1.0.157"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630"
|
||||
checksum = "78997f4555c22a7971214540c4a661291970619afd56de19f77e0de86296e1e5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -375,6 +375,17 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59d3276aee1fa0c33612917969b5172b5be2db051232a6e4826f1a1a9191b045"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "synapse"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Add `Synapse-Trace-Id` to `access-control-expose-headers` header.
|
||||
@@ -0,0 +1 @@
|
||||
Fix long-standing error when sending message into deleted room.
|
||||
@@ -0,0 +1 @@
|
||||
Implement [MSC2659](https://github.com/matrix-org/matrix-spec-proposals/pull/2659): application service ping endpoint. Contributed by Tulir @ Beeper.
|
||||
@@ -0,0 +1 @@
|
||||
Allow loading `/register/available` endpoint on workers.
|
||||
@@ -0,0 +1 @@
|
||||
Reorganize URL preview code.
|
||||
@@ -1 +1 @@
|
||||
Remove unused class `DirectTcpReplicationClientFactory`.
|
||||
Clean-up direct TCP replication code.
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Clean-up direct TCP replication code.
|
||||
@@ -0,0 +1 @@
|
||||
Make `configure_workers_and_start` script used in Complement tests compatible with older versions of Python.
|
||||
@@ -0,0 +1 @@
|
||||
Mirror images to the GitHub Container Registry (`ghcr.io/matrix-org/synapse`).
|
||||
@@ -0,0 +1 @@
|
||||
Mirror images to the GitHub Container Registry (`ghcr.io/matrix-org/synapse`).
|
||||
@@ -0,0 +1 @@
|
||||
Bump pydantic from 1.10.4 to 1.10.6.
|
||||
@@ -0,0 +1 @@
|
||||
Bump serde from 1.0.155 to 1.0.157.
|
||||
@@ -0,0 +1 @@
|
||||
Bump anyhow from 1.0.69 to 1.0.70.
|
||||
@@ -0,0 +1 @@
|
||||
Bump txredisapi from 1.4.7 to 1.4.9.
|
||||
@@ -0,0 +1 @@
|
||||
Bump pygithub from 1.57 to 1.58.1.
|
||||
@@ -0,0 +1 @@
|
||||
Bump types-requests from 2.28.11.12 to 2.28.11.15.
|
||||
@@ -0,0 +1 @@
|
||||
Add a `/versions` flag for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952).
|
||||
@@ -0,0 +1 @@
|
||||
Fix a long-standing bug where edits of non-`m.room.message` events would not be correctly bundled.
|
||||
@@ -0,0 +1 @@
|
||||
Fix a bug in which the [`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid) endpoint would return the wrong error if the user did not have permission to view the event. This aligns Synapse's implementation with [MSC2249](https://github.com/matrix-org/matrix-spec-proposals/pull/2249).
|
||||
@@ -0,0 +1 @@
|
||||
Fix a bug in which the [`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid) endpoint would return the wrong error if the user did not have permission to view the event. This aligns Synapse's implementation with [MSC2249](https://github.com/matrix-org/matrix-spec-proposals/pull/2249).
|
||||
@@ -0,0 +1,3 @@
|
||||
Fix a bug introduced in Synapse 1.75.0rc1 where the [SQLite port_db script](https://matrix-org.github.io/synapse/latest/postgres.html#porting-from-sqlite)
|
||||
would fail to open the SQLite database.
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Allow running the Twisted trunk job against other branches.
|
||||
@@ -0,0 +1 @@
|
||||
Remind the releaser to ask for changelog feedback in [#synapse-dev](https://matrix.to/#/#synapse-dev:matrix.org).
|
||||
@@ -0,0 +1 @@
|
||||
Make EventBase unhashable.
|
||||
@@ -163,6 +163,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
|
||||
"^/_matrix/client/versions$",
|
||||
"^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$",
|
||||
"^/_matrix/client/(r0|v3|unstable)/register$",
|
||||
"^/_matrix/client/(r0|v3|unstable)/register/available$",
|
||||
"^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$",
|
||||
"^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$",
|
||||
"^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event",
|
||||
@@ -422,7 +423,7 @@ def add_worker_roles_to_shared_config(
|
||||
|
||||
|
||||
def merge_worker_template_configs(
|
||||
existing_dict: Dict[str, Any] | None,
|
||||
existing_dict: Optional[Dict[str, Any]],
|
||||
to_be_merged_dict: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""When given an existing dict of worker template configuration consisting with both
|
||||
|
||||
@@ -26,8 +26,8 @@ for most users.
|
||||
#### Docker images and Ansible playbooks
|
||||
|
||||
There is an official synapse image available at
|
||||
<https://hub.docker.com/r/matrixdotorg/synapse> which can be used with
|
||||
the docker-compose file available at
|
||||
<https://hub.docker.com/r/matrixdotorg/synapse> or at [`ghcr.io/matrix-org/synapse`](https://ghcr.io/matrix-org/synapse)
|
||||
which can be used with the docker-compose file available at
|
||||
[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker).
|
||||
Further information on this including configuration options is available in the README
|
||||
on hub.docker.com.
|
||||
|
||||
@@ -88,6 +88,18 @@ process, for example:
|
||||
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
||||
```
|
||||
|
||||
# Upgrading to v1.80.0
|
||||
|
||||
## Reporting events error code change
|
||||
|
||||
Before this update, the
|
||||
[`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid)
|
||||
endpoint would return a `403` if a user attempted to report an event that they did not have access to.
|
||||
This endpoint will now return a `404` in this case instead.
|
||||
|
||||
Clients that implement event reporting should check that their error handling code will handle this
|
||||
change.
|
||||
|
||||
# Upgrading to v1.79.0
|
||||
|
||||
## The `on_threepid_bind` module callback method has been deprecated
|
||||
|
||||
@@ -245,6 +245,7 @@ information.
|
||||
# Registration/login requests
|
||||
^/_matrix/client/(api/v1|r0|v3|unstable)/login$
|
||||
^/_matrix/client/(r0|v3|unstable)/register$
|
||||
^/_matrix/client/(r0|v3|unstable)/register/available$
|
||||
^/_matrix/client/v1/register/m.login.registration_token/validity$
|
||||
|
||||
# Event sending requests
|
||||
|
||||
Generated
+50
-50
@@ -1568,48 +1568,48 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "pydantic"
|
||||
version = "1.10.4"
|
||||
version = "1.10.6"
|
||||
description = "Data validation and settings management using python type hints"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pydantic-1.10.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5635de53e6686fe7a44b5cf25fcc419a0d5e5c1a1efe73d49d48fe7586db854"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6dc1cc241440ed7ca9ab59d9929075445da6b7c94ced281b3dd4cfe6c8cff817"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51bdeb10d2db0f288e71d49c9cefa609bca271720ecd0c58009bd7504a0c464c"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78cec42b95dbb500a1f7120bdf95c401f6abb616bbe8785ef09887306792e66e"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8775d4ef5e7299a2f4699501077a0defdaac5b6c4321173bcb0f3c496fbadf85"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:572066051eeac73d23f95ba9a71349c42a3e05999d0ee1572b7860235b850cc6"},
|
||||
{file = "pydantic-1.10.4-cp310-cp310-win_amd64.whl", hash = "sha256:7feb6a2d401f4d6863050f58325b8d99c1e56f4512d98b11ac64ad1751dc647d"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39f4a73e5342b25c2959529f07f026ef58147249f9b7431e1ba8414a36761f53"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:983e720704431a6573d626b00662eb78a07148c9115129f9b4351091ec95ecc3"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d52162fe6b2b55964fbb0af2ee58e99791a3138588c482572bb6087953113a"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdf8d759ef326962b4678d89e275ffc55b7ce59d917d9f72233762061fd04a2d"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:05a81b006be15655b2a1bae5faa4280cf7c81d0e09fcb49b342ebf826abe5a72"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d88c4c0e5c5dfd05092a4b271282ef0588e5f4aaf345778056fc5259ba098857"},
|
||||
{file = "pydantic-1.10.4-cp311-cp311-win_amd64.whl", hash = "sha256:6a05a9db1ef5be0fe63e988f9617ca2551013f55000289c671f71ec16f4985e3"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:887ca463c3bc47103c123bc06919c86720e80e1214aab79e9b779cda0ff92a00"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdf88ab63c3ee282c76d652fc86518aacb737ff35796023fae56a65ced1a5978"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a48f1953c4a1d9bd0b5167ac50da9a79f6072c63c4cef4cf2a3736994903583e"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a9f2de23bec87ff306aef658384b02aa7c32389766af3c5dee9ce33e80222dfa"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:cd8702c5142afda03dc2b1ee6bc358b62b3735b2cce53fc77b31ca9f728e4bc8"},
|
||||
{file = "pydantic-1.10.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6e7124d6855b2780611d9f5e1e145e86667eaa3bd9459192c8dc1a097f5e9903"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b53e1d41e97063d51a02821b80538053ee4608b9a181c1005441f1673c55423"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:55b1625899acd33229c4352ce0ae54038529b412bd51c4915349b49ca575258f"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:301d626a59edbe5dfb48fcae245896379a450d04baeed50ef40d8199f2733b06"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6f9d649892a6f54a39ed56b8dfd5e08b5f3be5f893da430bed76975f3735d15"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d7b5a3821225f5c43496c324b0d6875fde910a1c2933d726a743ce328fbb2a8c"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f2f7eb6273dd12472d7f218e1fef6f7c7c2f00ac2e1ecde4db8824c457300416"},
|
||||
{file = "pydantic-1.10.4-cp38-cp38-win_amd64.whl", hash = "sha256:4b05697738e7d2040696b0a66d9f0a10bec0efa1883ca75ee9e55baf511909d6"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a9a6747cac06c2beb466064dda999a13176b23535e4c496c9d48e6406f92d42d"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb992a1ef739cc7b543576337bebfc62c0e6567434e522e97291b251a41dad7f"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:990406d226dea0e8f25f643b370224771878142155b879784ce89f633541a024"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e82a6d37a95e0b1b42b82ab340ada3963aea1317fd7f888bb6b9dfbf4fff57c"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9193d4f4ee8feca58bc56c8306bcb820f5c7905fd919e0750acdeeeef0615b28"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b3ce5f16deb45c472dde1a0ee05619298c864a20cded09c4edd820e1454129f"},
|
||||
{file = "pydantic-1.10.4-cp39-cp39-win_amd64.whl", hash = "sha256:9cbdc268a62d9a98c56e2452d6c41c0263d64a2009aac69246486f01b4f594c4"},
|
||||
{file = "pydantic-1.10.4-py3-none-any.whl", hash = "sha256:4948f264678c703f3877d1c8877c4e3b2e12e549c57795107f08cf70c6ec7774"},
|
||||
{file = "pydantic-1.10.4.tar.gz", hash = "sha256:b9a3859f24eb4e097502a3be1fb4b2abb79b6103dd9e2e0edb70613a4459a648"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9289065611c48147c1dd1fd344e9d57ab45f1d99b0fb26c51f1cf72cd9bcd31"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c32b6bba301490d9bb2bf5f631907803135e8085b6aa3e5fe5a770d46dd0160"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd9b9e98068fa1068edfc9eabde70a7132017bdd4f362f8b4fd0abed79c33083"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c84583b9df62522829cbc46e2b22e0ec11445625b5acd70c5681ce09c9b11c4"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b41822064585fea56d0116aa431fbd5137ce69dfe837b599e310034171996084"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61f1f08adfaa9cc02e0cbc94f478140385cbd52d5b3c5a657c2fceb15de8d1fb"},
|
||||
{file = "pydantic-1.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:32937835e525d92c98a1512218db4eed9ddc8f4ee2a78382d77f54341972c0e7"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd5c531b22928e63d0cb1868dee76123456e1de2f1cb45879e9e7a3f3f1779b"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e277bd18339177daa62a294256869bbe84df1fb592be2716ec62627bb8d7c81d"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f15277d720aa57e173954d237628a8d304896364b9de745dcb722f584812c7"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b243b564cea2576725e77aeeda54e3e0229a168bc587d536cd69941e6797543d"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3ce13a558b484c9ae48a6a7c184b1ba0e5588c5525482681db418268e5f86186"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3ac1cd4deed871dfe0c5f63721e29debf03e2deefa41b3ed5eb5f5df287c7b70"},
|
||||
{file = "pydantic-1.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:b1eb6610330a1dfba9ce142ada792f26bbef1255b75f538196a39e9e90388bf4"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4ca83739c1263a044ec8b79df4eefc34bbac87191f0a513d00dd47d46e307a65"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea4e2a7cb409951988e79a469f609bba998a576e6d7b9791ae5d1e0619e1c0f2"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53de12b4608290992a943801d7756f18a37b7aee284b9ffa794ee8ea8153f8e2"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:60184e80aac3b56933c71c48d6181e630b0fbc61ae455a63322a66a23c14731a"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:415a3f719ce518e95a92effc7ee30118a25c3d032455d13e121e3840985f2efd"},
|
||||
{file = "pydantic-1.10.6-cp37-cp37m-win_amd64.whl", hash = "sha256:72cb30894a34d3a7ab6d959b45a70abac8a2a93b6480fc5a7bfbd9c935bdc4fb"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3091d2eaeda25391405e36c2fc2ed102b48bac4b384d42b2267310abae350ca6"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:751f008cd2afe812a781fd6aa2fb66c620ca2e1a13b6a2152b1ad51553cb4b77"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12e837fd320dd30bd625be1b101e3b62edc096a49835392dcf418f1a5ac2b832"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d92831d0115874d766b1f5fddcdde0c5b6c60f8c6111a394078ec227fca6d"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:476f6674303ae7965730a382a8e8d7fae18b8004b7b69a56c3d8fa93968aa21c"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a2be0a0f32c83265fd71a45027201e1278beaa82ea88ea5b345eea6afa9ac7f"},
|
||||
{file = "pydantic-1.10.6-cp38-cp38-win_amd64.whl", hash = "sha256:0abd9c60eee6201b853b6c4be104edfba4f8f6c5f3623f8e1dba90634d63eb35"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6195ca908045054dd2d57eb9c39a5fe86409968b8040de8c2240186da0769da7"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43cdeca8d30de9a897440e3fb8866f827c4c31f6c73838e3a01a14b03b067b1d"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c19eb5163167489cb1e0161ae9220dadd4fc609a42649e7e84a8fa8fff7a80f"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:012c99a9c0d18cfde7469aa1ebff922e24b0c706d03ead96940f5465f2c9cf62"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:528dcf7ec49fb5a84bf6fe346c1cc3c55b0e7603c2123881996ca3ad79db5bfc"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:163e79386c3547c49366e959d01e37fc30252285a70619ffc1b10ede4758250a"},
|
||||
{file = "pydantic-1.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:189318051c3d57821f7233ecc94708767dd67687a614a4e8f92b4a020d4ffd06"},
|
||||
{file = "pydantic-1.10.6-py3-none-any.whl", hash = "sha256:acc6783751ac9c9bc4680379edd6d286468a1dc8d7d9906cd6f1186ed682b2b0"},
|
||||
{file = "pydantic-1.10.6.tar.gz", hash = "sha256:cf95adb0d1671fc38d8c43dd921ad5814a735e7d9b4d9e437c088002863854fd"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1621,25 +1621,22 @@ email = ["email-validator (>=1.0.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "pygithub"
|
||||
version = "1.57"
|
||||
version = "1.58.1"
|
||||
description = "Use the full Github API v3"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "PyGithub-1.57-py3-none-any.whl", hash = "sha256:5822febeac2391f1306c55a99af2bc8f86c8bf82ded000030cd02c18f31b731f"},
|
||||
{file = "PyGithub-1.57.tar.gz", hash = "sha256:c273f252b278fb81f1769505cc6921bdb6791e1cebd6ac850cc97dad13c31ff3"},
|
||||
{file = "PyGithub-1.58.1-py3-none-any.whl", hash = "sha256:4e7fe9c3ec30d5fde5b4fbb97f18821c9dbf372bf6df337fe66f6689a65e0a83"},
|
||||
{file = "PyGithub-1.58.1.tar.gz", hash = "sha256:7d528b4ad92bc13122129fafd444ce3d04c47d2d801f6446b6e6ee2d410235b3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
deprecated = "*"
|
||||
pyjwt = ">=2.4.0"
|
||||
pyjwt = {version = ">=2.4.0", extras = ["crypto"]}
|
||||
pynacl = ">=1.4.0"
|
||||
requests = ">=2.14.0"
|
||||
|
||||
[package.extras]
|
||||
integrations = ["cryptography"]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.11.2"
|
||||
@@ -1675,6 +1672,9 @@ files = [
|
||||
{file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cryptography = {version = ">=3.3.1", optional = true, markers = "extra == \"crypto\""}
|
||||
|
||||
[package.extras]
|
||||
crypto = ["cryptography (>=3.3.1)"]
|
||||
dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.3.1)", "mypy", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"]
|
||||
@@ -2523,14 +2523,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "txredisapi"
|
||||
version = "1.4.7"
|
||||
version = "1.4.9"
|
||||
description = "non-blocking redis client for python"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "txredisapi-1.4.7-py3-none-any.whl", hash = "sha256:34c9eba8d34f452d30661f073b67b8cd42b695e3d31678ec1bbf628a65a0f059"},
|
||||
{file = "txredisapi-1.4.7.tar.gz", hash = "sha256:e6cc43f51e35d608abdca8f8c7d20e148fe1d82679f6e584baea613ebec812bb"},
|
||||
{file = "txredisapi-1.4.9-py3-none-any.whl", hash = "sha256:72e6ad09cc5fffe3bec2e55e5bfb74407bd357565fc212e6003f7e26ef7d8f78"},
|
||||
{file = "txredisapi-1.4.9.tar.gz", hash = "sha256:c9607062d05e4d0b8ef84719eb76a3fe7d5ccd606a2acf024429da51d6e84559"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2684,14 +2684,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "types-requests"
|
||||
version = "2.28.11.12"
|
||||
version = "2.28.11.15"
|
||||
description = "Typing stubs for requests"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "types-requests-2.28.11.12.tar.gz", hash = "sha256:fd530aab3fc4f05ee36406af168f0836e6f00f1ee51a0b96b7311f82cb675230"},
|
||||
{file = "types_requests-2.28.11.12-py3-none-any.whl", hash = "sha256:dbc2933635860e553ffc59f5e264264981358baffe6342b925e3eb8261f866ee"},
|
||||
{file = "types-requests-2.28.11.15.tar.gz", hash = "sha256:fc8eaa09cc014699c6b63c60c2e3add0c8b09a410c818b5ac6e65f92a26dde09"},
|
||||
{file = "types_requests-2.28.11.15-py3-none-any.whl", hash = "sha256:a05e4c7bc967518fba5789c341ea8b0c942776ee474c7873129a61161978e586"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
||||
@@ -280,7 +280,7 @@ def _prepare() -> None:
|
||||
)
|
||||
|
||||
print("Opening the changelog in your browser...")
|
||||
print("Please ask others to give it a check.")
|
||||
print("Please ask #synapse-dev to give it a check.")
|
||||
click.launch(
|
||||
f"https://github.com/matrix-org/synapse/blob/{synapse_repo.active_branch.name}/CHANGES.md"
|
||||
)
|
||||
|
||||
@@ -1329,7 +1329,7 @@ def main() -> None:
|
||||
sqlite_config = {
|
||||
"name": "sqlite3",
|
||||
"args": {
|
||||
"database": "file:{}?mode=rw".format(args.sqlite_database),
|
||||
"database": args.sqlite_database,
|
||||
"cp_min": 1,
|
||||
"cp_max": 1,
|
||||
"check_same_thread": False,
|
||||
|
||||
@@ -108,6 +108,11 @@ class Codes(str, Enum):
|
||||
|
||||
USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL"
|
||||
|
||||
AS_PING_URL_NOT_SET = "FI.MAU.MSC2659_URL_NOT_SET"
|
||||
AS_PING_BAD_STATUS = "FI.MAU.MSC2659_BAD_STATUS"
|
||||
AS_PING_CONNECTION_TIMEOUT = "FI.MAU.MSC2659_CONNECTION_TIMEOUT"
|
||||
AS_PING_CONNECTION_FAILED = "FI.MAU.MSC2659_CONNECTION_FAILED"
|
||||
|
||||
# Attempt to send a second annotation with the same event type & annotation key
|
||||
# MSC2677
|
||||
DUPLICATE_ANNOTATION = "M_DUPLICATE_ANNOTATION"
|
||||
|
||||
@@ -266,6 +266,19 @@ class ApplicationServiceApi(SimpleHttpClient):
|
||||
key = (service.id, protocol)
|
||||
return await self.protocol_meta_cache.wrap(key, _get)
|
||||
|
||||
async def ping(self, service: "ApplicationService", txn_id: Optional[str]) -> None:
|
||||
# The caller should check that url is set
|
||||
assert service.url is not None, "ping called without URL being set"
|
||||
|
||||
# This is required by the configuration.
|
||||
assert service.hs_token is not None
|
||||
|
||||
await self.post_json_get_json(
|
||||
uri=service.url + "/_matrix/app/unstable/fi.mau.msc2659/ping",
|
||||
post_json={"transaction_id": txn_id},
|
||||
headers={"Authorization": [f"Bearer {service.hs_token}"]},
|
||||
)
|
||||
|
||||
async def push_bulk(
|
||||
self,
|
||||
service: "ApplicationService",
|
||||
|
||||
@@ -178,3 +178,6 @@ class ExperimentalConfig(Config):
|
||||
|
||||
# MSC3967: Do not require UIA when first uploading cross signing keys
|
||||
self.msc3967_enabled = experimental.get("msc3967_enabled", False)
|
||||
|
||||
# MSC2659: Application service ping endpoint
|
||||
self.msc2659_enabled = experimental.get("msc2659_enabled", False)
|
||||
|
||||
@@ -24,6 +24,7 @@ from typing import (
|
||||
Generic,
|
||||
Iterable,
|
||||
List,
|
||||
NoReturn,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
@@ -338,6 +339,9 @@ class EventBase(metaclass=abc.ABCMeta):
|
||||
type: DictProperty[str] = DictProperty("type")
|
||||
user_id: DictProperty[str] = DictProperty("sender")
|
||||
|
||||
def __hash__(self) -> NoReturn:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def event_id(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -159,15 +159,16 @@ class EventHandler:
|
||||
Returns:
|
||||
An event, or None if there is no event matching this ID.
|
||||
Raises:
|
||||
SynapseError if there was a problem retrieving this event, or
|
||||
AuthError if the user does not have the rights to inspect this
|
||||
event.
|
||||
AuthError: if the user does not have the rights to inspect this event.
|
||||
"""
|
||||
redact_behaviour = (
|
||||
EventRedactBehaviour.as_is if show_redacted else EventRedactBehaviour.redact
|
||||
)
|
||||
event = await self.store.get_event(
|
||||
event_id, check_room_id=room_id, redact_behaviour=redact_behaviour
|
||||
event_id,
|
||||
check_room_id=room_id,
|
||||
redact_behaviour=redact_behaviour,
|
||||
allow_none=True,
|
||||
)
|
||||
|
||||
if not event:
|
||||
|
||||
@@ -987,10 +987,11 @@ class EventCreationHandler:
|
||||
# a situation where event persistence can't keep up, causing
|
||||
# extremities to pile up, which in turn leads to state resolution
|
||||
# taking longer.
|
||||
async with self.limiter.queue(event_dict["room_id"]):
|
||||
room_id = event_dict["room_id"]
|
||||
async with self.limiter.queue(room_id):
|
||||
if txn_id:
|
||||
event = await self.get_event_from_transaction(
|
||||
requester, txn_id, event_dict["room_id"]
|
||||
requester, txn_id, room_id
|
||||
)
|
||||
if event:
|
||||
# we know it was persisted, so must have a stream ordering
|
||||
@@ -1000,6 +1001,18 @@ class EventCreationHandler:
|
||||
event.internal_metadata.stream_ordering,
|
||||
)
|
||||
|
||||
# If we don't have any prev event IDs specified then we need to
|
||||
# check that the host is in the room (as otherwise populating the
|
||||
# prev events will fail), at which point we may as well check the
|
||||
# local user is in the room.
|
||||
if not prev_event_ids:
|
||||
user_id = requester.user.to_string()
|
||||
is_user_in_room = await self.store.check_local_user_in_room(
|
||||
user_id, room_id
|
||||
)
|
||||
if not is_user_in_room:
|
||||
raise AuthError(403, f"User {user_id} not in room {room_id}")
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in handle_new_client_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
|
||||
@@ -892,6 +892,10 @@ def set_cors_headers(request: SynapseRequest) -> None:
|
||||
b"Access-Control-Allow-Headers",
|
||||
b"X-Requested-With, Content-Type, Authorization, Date",
|
||||
)
|
||||
request.setHeader(
|
||||
b"Access-Control-Expose-Headers",
|
||||
b"Synapse-Trace-Id",
|
||||
)
|
||||
|
||||
|
||||
def set_corp_headers(request: Request) -> None:
|
||||
|
||||
@@ -0,0 +1,833 @@
|
||||
# Copyright 2016 OpenMarket Ltd
|
||||
# Copyright 2020-2023 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import datetime
|
||||
import errno
|
||||
import fnmatch
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
|
||||
from urllib.parse import urljoin, urlparse, urlsplit
|
||||
from urllib.request import urlopen
|
||||
|
||||
import attr
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.error import DNSLookupError
|
||||
|
||||
from synapse.api.errors import Codes, SynapseError
|
||||
from synapse.http.client import SimpleHttpClient
|
||||
from synapse.logging.context import make_deferred_yieldable, run_in_background
|
||||
from synapse.media._base import FileInfo, get_filename_from_headers
|
||||
from synapse.media.media_storage import MediaStorage
|
||||
from synapse.media.oembed import OEmbedProvider
|
||||
from synapse.media.preview_html import decode_body, parse_html_to_open_graph
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.types import JsonDict, UserID
|
||||
from synapse.util import json_encoder
|
||||
from synapse.util.async_helpers import ObservableDeferred
|
||||
from synapse.util.caches.expiringcache import ExpiringCache
|
||||
from synapse.util.stringutils import random_string
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from synapse.media.media_repository import MediaRepository
|
||||
from synapse.server import HomeServer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OG_TAG_NAME_MAXLEN = 50
|
||||
OG_TAG_VALUE_MAXLEN = 1000
|
||||
|
||||
ONE_HOUR = 60 * 60 * 1000
|
||||
ONE_DAY = 24 * ONE_HOUR
|
||||
IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class DownloadResult:
|
||||
length: int
|
||||
uri: str
|
||||
response_code: int
|
||||
media_type: str
|
||||
download_name: Optional[str]
|
||||
expires: int
|
||||
etag: Optional[str]
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class MediaInfo:
|
||||
"""
|
||||
Information parsed from downloading media being previewed.
|
||||
"""
|
||||
|
||||
# The Content-Type header of the response.
|
||||
media_type: str
|
||||
# The length (in bytes) of the downloaded media.
|
||||
media_length: int
|
||||
# The media filename, according to the server. This is parsed from the
|
||||
# returned headers, if possible.
|
||||
download_name: Optional[str]
|
||||
# The time of the preview.
|
||||
created_ts_ms: int
|
||||
# Information from the media storage provider about where the file is stored
|
||||
# on disk.
|
||||
filesystem_id: str
|
||||
filename: str
|
||||
# The URI being previewed.
|
||||
uri: str
|
||||
# The HTTP response code.
|
||||
response_code: int
|
||||
# The timestamp (in milliseconds) of when this preview expires.
|
||||
expires: int
|
||||
# The ETag header of the response.
|
||||
etag: Optional[str]
|
||||
|
||||
|
||||
class UrlPreviewer:
|
||||
"""
|
||||
Generates an Open Graph (https://ogp.me/) responses (with some Matrix
|
||||
specific additions) for a given URL.
|
||||
|
||||
When Synapse is asked to preview a URL it does the following:
|
||||
|
||||
1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
|
||||
config).
|
||||
2. Checks the URL against an in-memory cache and returns the result if it exists. (This
|
||||
is also used to de-duplicate processing of multiple in-flight requests at once.)
|
||||
3. Kicks off a background process to generate a preview:
|
||||
1. Checks URL and timestamp against the database cache and returns the result if it
|
||||
has not expired and was successful (a 2xx return code).
|
||||
2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
|
||||
does, update the URL to download.
|
||||
3. Downloads the URL and stores it into a file via the media storage provider
|
||||
and saves the local media metadata.
|
||||
4. If the media is an image:
|
||||
1. Generates thumbnails.
|
||||
2. Generates an Open Graph response based on image properties.
|
||||
5. If the media is HTML:
|
||||
1. Decodes the HTML via the stored file.
|
||||
2. Generates an Open Graph response from the HTML.
|
||||
3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
|
||||
1. Downloads the URL and stores it into a file via the media storage provider
|
||||
and saves the local media metadata.
|
||||
2. Convert the oEmbed response to an Open Graph response.
|
||||
3. Override any Open Graph data from the HTML with data from oEmbed.
|
||||
4. If an image exists in the Open Graph response:
|
||||
1. Downloads the URL and stores it into a file via the media storage
|
||||
provider and saves the local media metadata.
|
||||
2. Generates thumbnails.
|
||||
3. Updates the Open Graph response based on image properties.
|
||||
6. If the media is JSON and an oEmbed URL was found:
|
||||
1. Convert the oEmbed response to an Open Graph response.
|
||||
2. If a thumbnail or image is in the oEmbed response:
|
||||
1. Downloads the URL and stores it into a file via the media storage
|
||||
provider and saves the local media metadata.
|
||||
2. Generates thumbnails.
|
||||
3. Updates the Open Graph response based on image properties.
|
||||
7. Stores the result in the database cache.
|
||||
4. Returns the result.
|
||||
|
||||
If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
|
||||
image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
|
||||
does not fail. As much information as possible is returned.
|
||||
|
||||
The in-memory cache expires after 1 hour.
|
||||
|
||||
Expired entries in the database cache (and their associated media files) are
|
||||
deleted every 10 seconds. The default expiration time is 1 hour from download.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hs: "HomeServer",
|
||||
media_repo: "MediaRepository",
|
||||
media_storage: MediaStorage,
|
||||
):
|
||||
self.clock = hs.get_clock()
|
||||
self.filepaths = media_repo.filepaths
|
||||
self.max_spider_size = hs.config.media.max_spider_size
|
||||
self.server_name = hs.hostname
|
||||
self.store = hs.get_datastores().main
|
||||
self.client = SimpleHttpClient(
|
||||
hs,
|
||||
treq_args={"browser_like_redirects": True},
|
||||
ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
|
||||
ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
|
||||
use_proxy=True,
|
||||
)
|
||||
self.media_repo = media_repo
|
||||
self.primary_base_path = media_repo.primary_base_path
|
||||
self.media_storage = media_storage
|
||||
|
||||
self._oembed = OEmbedProvider(hs)
|
||||
|
||||
# We run the background jobs if we're the instance specified (or no
|
||||
# instance is specified, where we assume there is only one instance
|
||||
# serving media).
|
||||
instance_running_jobs = hs.config.media.media_instance_running_background_jobs
|
||||
self._worker_run_media_background_jobs = (
|
||||
instance_running_jobs is None
|
||||
or instance_running_jobs == hs.get_instance_name()
|
||||
)
|
||||
|
||||
self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
|
||||
self.url_preview_accept_language = hs.config.media.url_preview_accept_language
|
||||
|
||||
# memory cache mapping urls to an ObservableDeferred returning
|
||||
# JSON-encoded OG metadata
|
||||
self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
|
||||
cache_name="url_previews",
|
||||
clock=self.clock,
|
||||
# don't spider URLs more often than once an hour
|
||||
expiry_ms=ONE_HOUR,
|
||||
)
|
||||
|
||||
if self._worker_run_media_background_jobs:
|
||||
self._cleaner_loop = self.clock.looping_call(
|
||||
self._start_expire_url_cache_data, 10 * 1000
|
||||
)
|
||||
|
||||
async def preview(self, url: str, user: UserID, ts: int) -> bytes:
|
||||
# XXX: we could move this into _do_preview if we wanted.
|
||||
url_tuple = urlsplit(url)
|
||||
for entry in self.url_preview_url_blacklist:
|
||||
match = True
|
||||
for attrib in entry:
|
||||
pattern = entry[attrib]
|
||||
value = getattr(url_tuple, attrib)
|
||||
logger.debug(
|
||||
"Matching attrib '%s' with value '%s' against pattern '%s'",
|
||||
attrib,
|
||||
value,
|
||||
pattern,
|
||||
)
|
||||
|
||||
if value is None:
|
||||
match = False
|
||||
continue
|
||||
|
||||
# Some attributes might not be parsed as strings by urlsplit (such as the
|
||||
# port, which is parsed as an int). Because we use match functions that
|
||||
# expect strings, we want to make sure that's what we give them.
|
||||
value_str = str(value)
|
||||
|
||||
if pattern.startswith("^"):
|
||||
if not re.match(pattern, value_str):
|
||||
match = False
|
||||
continue
|
||||
else:
|
||||
if not fnmatch.fnmatch(value_str, pattern):
|
||||
match = False
|
||||
continue
|
||||
if match:
|
||||
logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
|
||||
raise SynapseError(
|
||||
403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
|
||||
)
|
||||
|
||||
# the in-memory cache:
|
||||
# * ensures that only one request is active at a time
|
||||
# * takes load off the DB for the thundering herds
|
||||
# * also caches any failures (unlike the DB) so we don't keep
|
||||
# requesting the same endpoint
|
||||
|
||||
observable = self._cache.get(url)
|
||||
|
||||
if not observable:
|
||||
download = run_in_background(self._do_preview, url, user, ts)
|
||||
observable = ObservableDeferred(download, consumeErrors=True)
|
||||
self._cache[url] = observable
|
||||
else:
|
||||
logger.info("Returning cached response")
|
||||
|
||||
return await make_deferred_yieldable(observable.observe())
|
||||
|
||||
async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
|
||||
"""Check the db, and download the URL and build a preview
|
||||
|
||||
Args:
|
||||
url: The URL to preview.
|
||||
user: The user requesting the preview.
|
||||
ts: The timestamp requested for the preview.
|
||||
|
||||
Returns:
|
||||
json-encoded og data
|
||||
"""
|
||||
# check the URL cache in the DB (which will also provide us with
|
||||
# historical previews, if we have any)
|
||||
cache_result = await self.store.get_url_cache(url, ts)
|
||||
if (
|
||||
cache_result
|
||||
and cache_result["expires_ts"] > ts
|
||||
and cache_result["response_code"] / 100 == 2
|
||||
):
|
||||
# It may be stored as text in the database, not as bytes (such as
|
||||
# PostgreSQL). If so, encode it back before handing it on.
|
||||
og = cache_result["og"]
|
||||
if isinstance(og, str):
|
||||
og = og.encode("utf8")
|
||||
return og
|
||||
|
||||
# If this URL can be accessed via oEmbed, use that instead.
|
||||
url_to_download = url
|
||||
oembed_url = self._oembed.get_oembed_url(url)
|
||||
if oembed_url:
|
||||
url_to_download = oembed_url
|
||||
|
||||
media_info = await self._handle_url(url_to_download, user)
|
||||
|
||||
logger.debug("got media_info of '%s'", media_info)
|
||||
|
||||
# The number of milliseconds that the response should be considered valid.
|
||||
expiration_ms = media_info.expires
|
||||
author_name: Optional[str] = None
|
||||
|
||||
if _is_media(media_info.media_type):
|
||||
file_id = media_info.filesystem_id
|
||||
dims = await self.media_repo._generate_thumbnails(
|
||||
None, file_id, file_id, media_info.media_type, url_cache=True
|
||||
)
|
||||
|
||||
og = {
|
||||
"og:description": media_info.download_name,
|
||||
"og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
|
||||
"og:image:type": media_info.media_type,
|
||||
"matrix:image:size": media_info.media_length,
|
||||
}
|
||||
|
||||
if dims:
|
||||
og["og:image:width"] = dims["width"]
|
||||
og["og:image:height"] = dims["height"]
|
||||
else:
|
||||
logger.warning("Couldn't get dims for %s" % url)
|
||||
|
||||
# define our OG response for this media
|
||||
elif _is_html(media_info.media_type):
|
||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||
|
||||
with open(media_info.filename, "rb") as file:
|
||||
body = file.read()
|
||||
|
||||
tree = decode_body(body, media_info.uri, media_info.media_type)
|
||||
if tree is not None:
|
||||
# Check if this HTML document points to oEmbed information and
|
||||
# defer to that.
|
||||
oembed_url = self._oembed.autodiscover_from_html(tree)
|
||||
og_from_oembed: JsonDict = {}
|
||||
if oembed_url:
|
||||
try:
|
||||
oembed_info = await self._handle_url(
|
||||
oembed_url, user, allow_data_urls=True
|
||||
)
|
||||
except Exception as e:
|
||||
# Fetching the oEmbed info failed, don't block the entire URL preview.
|
||||
logger.warning(
|
||||
"oEmbed fetch failed during URL preview: %s errored with %s",
|
||||
oembed_url,
|
||||
e,
|
||||
)
|
||||
else:
|
||||
(
|
||||
og_from_oembed,
|
||||
author_name,
|
||||
expiration_ms,
|
||||
) = await self._handle_oembed_response(
|
||||
url, oembed_info, expiration_ms
|
||||
)
|
||||
|
||||
# Parse Open Graph information from the HTML in case the oEmbed
|
||||
# response failed or is incomplete.
|
||||
og_from_html = parse_html_to_open_graph(tree)
|
||||
|
||||
# Compile the Open Graph response by using the scraped
|
||||
# information from the HTML and overlaying any information
|
||||
# from the oEmbed response.
|
||||
og = {**og_from_html, **og_from_oembed}
|
||||
|
||||
await self._precache_image_url(user, media_info, og)
|
||||
else:
|
||||
og = {}
|
||||
|
||||
elif oembed_url:
|
||||
# Handle the oEmbed information.
|
||||
og, author_name, expiration_ms = await self._handle_oembed_response(
|
||||
url, media_info, expiration_ms
|
||||
)
|
||||
await self._precache_image_url(user, media_info, og)
|
||||
|
||||
else:
|
||||
logger.warning("Failed to find any OG data in %s", url)
|
||||
og = {}
|
||||
|
||||
# If we don't have a title but we have author_name, copy it as
|
||||
# title
|
||||
if not og.get("og:title") and author_name:
|
||||
og["og:title"] = author_name
|
||||
|
||||
# filter out any stupidly long values
|
||||
keys_to_remove = []
|
||||
for k, v in og.items():
|
||||
# values can be numeric as well as strings, hence the cast to str
|
||||
if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
|
||||
logger.warning(
|
||||
"Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
|
||||
)
|
||||
keys_to_remove.append(k)
|
||||
for k in keys_to_remove:
|
||||
del og[k]
|
||||
|
||||
logger.debug("Calculated OG for %s as %s", url, og)
|
||||
|
||||
jsonog = json_encoder.encode(og)
|
||||
|
||||
# Cap the amount of time to consider a response valid.
|
||||
expiration_ms = min(expiration_ms, ONE_DAY)
|
||||
|
||||
# store OG in history-aware DB cache
|
||||
await self.store.store_url_cache(
|
||||
url,
|
||||
media_info.response_code,
|
||||
media_info.etag,
|
||||
media_info.created_ts_ms + expiration_ms,
|
||||
jsonog,
|
||||
media_info.filesystem_id,
|
||||
media_info.created_ts_ms,
|
||||
)
|
||||
|
||||
return jsonog.encode("utf8")
|
||||
|
||||
async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
|
||||
"""
|
||||
Fetches a remote URL and parses the headers.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
output_stream: The stream to write the content to.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
Media length, URL downloaded, the HTTP response code,
|
||||
the media type, the downloaded file name, the number of
|
||||
milliseconds the result is valid for, the etag header.
|
||||
"""
|
||||
|
||||
try:
|
||||
logger.debug("Trying to get preview for url '%s'", url)
|
||||
length, headers, uri, code = await self.client.get_file(
|
||||
url,
|
||||
output_stream=output_stream,
|
||||
max_size=self.max_spider_size,
|
||||
headers={
|
||||
b"Accept-Language": self.url_preview_accept_language,
|
||||
# Use a custom user agent for the preview because some sites will only return
|
||||
# Open Graph metadata to crawler user agents. Omit the Synapse version
|
||||
# string to avoid leaking information.
|
||||
b"User-Agent": [
|
||||
"Synapse (bot; +https://github.com/matrix-org/synapse)"
|
||||
],
|
||||
},
|
||||
is_allowed_content_type=_is_previewable,
|
||||
)
|
||||
except SynapseError:
|
||||
# Pass SynapseErrors through directly, so that the servlet
|
||||
# handler will return a SynapseError to the client instead of
|
||||
# blank data or a 500.
|
||||
raise
|
||||
except DNSLookupError:
|
||||
# DNS lookup returned no results
|
||||
# Note: This will also be the case if one of the resolved IP
|
||||
# addresses is blacklisted
|
||||
raise SynapseError(
|
||||
502,
|
||||
"DNS resolution failure during URL preview generation",
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
except Exception as e:
|
||||
# FIXME: pass through 404s and other error messages nicely
|
||||
logger.warning("Error downloading %s: %r", url, e)
|
||||
|
||||
raise SynapseError(
|
||||
500,
|
||||
"Failed to download content: %s"
|
||||
% (traceback.format_exception_only(sys.exc_info()[0], e),),
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
|
||||
if b"Content-Type" in headers:
|
||||
media_type = headers[b"Content-Type"][0].decode("ascii")
|
||||
else:
|
||||
media_type = "application/octet-stream"
|
||||
|
||||
download_name = get_filename_from_headers(headers)
|
||||
|
||||
# FIXME: we should calculate a proper expiration based on the
|
||||
# Cache-Control and Expire headers. But for now, assume 1 hour.
|
||||
expires = ONE_HOUR
|
||||
etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
|
||||
|
||||
return DownloadResult(
|
||||
length, uri, code, media_type, download_name, expires, etag
|
||||
)
|
||||
|
||||
async def _parse_data_url(
|
||||
self, url: str, output_stream: BinaryIO
|
||||
) -> DownloadResult:
|
||||
"""
|
||||
Parses a data: URL.
|
||||
|
||||
Args:
|
||||
url: The URL to parse.
|
||||
output_stream: The stream to write the content to.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
Media length, URL downloaded, the HTTP response code,
|
||||
the media type, the downloaded file name, the number of
|
||||
milliseconds the result is valid for, the etag header.
|
||||
"""
|
||||
|
||||
try:
|
||||
logger.debug("Trying to parse data url '%s'", url)
|
||||
with urlopen(url) as url_info:
|
||||
# TODO Can this be more efficient.
|
||||
output_stream.write(url_info.read())
|
||||
except Exception as e:
|
||||
logger.warning("Error parsing data: URL %s: %r", url, e)
|
||||
|
||||
raise SynapseError(
|
||||
500,
|
||||
"Failed to parse data URL: %s"
|
||||
% (traceback.format_exception_only(sys.exc_info()[0], e),),
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
|
||||
return DownloadResult(
|
||||
# Read back the length that has been written.
|
||||
length=output_stream.tell(),
|
||||
uri=url,
|
||||
# If it was parsed, consider this a 200 OK.
|
||||
response_code=200,
|
||||
# urlopen shoves the media-type from the data URL into the content type
|
||||
# header object.
|
||||
media_type=url_info.headers.get_content_type(),
|
||||
# Some features are not supported by data: URLs.
|
||||
download_name=None,
|
||||
expires=ONE_HOUR,
|
||||
etag=None,
|
||||
)
|
||||
|
||||
async def _handle_url(
|
||||
self, url: str, user: UserID, allow_data_urls: bool = False
|
||||
) -> MediaInfo:
|
||||
"""
|
||||
Fetches content from a URL and parses the result to generate a MediaInfo.
|
||||
|
||||
It uses the media storage provider to persist the fetched content and
|
||||
stores the mapping into the database.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
user: The user who ahs requested this URL.
|
||||
allow_data_urls: True if data URLs should be allowed.
|
||||
|
||||
Returns:
|
||||
A MediaInfo object describing the fetched content.
|
||||
"""
|
||||
|
||||
# TODO: we should probably honour robots.txt... except in practice
|
||||
# we're most likely being explicitly triggered by a human rather than a
|
||||
# bot, so are we really a robot?
|
||||
|
||||
file_id = datetime.date.today().isoformat() + "_" + random_string(16)
|
||||
|
||||
file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
|
||||
|
||||
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
|
||||
if url.startswith("data:"):
|
||||
if not allow_data_urls:
|
||||
raise SynapseError(
|
||||
500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
|
||||
)
|
||||
|
||||
download_result = await self._parse_data_url(url, f)
|
||||
else:
|
||||
download_result = await self._download_url(url, f)
|
||||
|
||||
await finish()
|
||||
|
||||
try:
|
||||
time_now_ms = self.clock.time_msec()
|
||||
|
||||
await self.store.store_local_media(
|
||||
media_id=file_id,
|
||||
media_type=download_result.media_type,
|
||||
time_now_ms=time_now_ms,
|
||||
upload_name=download_result.download_name,
|
||||
media_length=download_result.length,
|
||||
user_id=user,
|
||||
url_cache=url,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling downloaded %s: %r", url, e)
|
||||
# TODO: we really ought to delete the downloaded file in this
|
||||
# case, since we won't have recorded it in the db, and will
|
||||
# therefore not expire it.
|
||||
raise
|
||||
|
||||
return MediaInfo(
|
||||
media_type=download_result.media_type,
|
||||
media_length=download_result.length,
|
||||
download_name=download_result.download_name,
|
||||
created_ts_ms=time_now_ms,
|
||||
filesystem_id=file_id,
|
||||
filename=fname,
|
||||
uri=download_result.uri,
|
||||
response_code=download_result.response_code,
|
||||
expires=download_result.expires,
|
||||
etag=download_result.etag,
|
||||
)
|
||||
|
||||
async def _precache_image_url(
|
||||
self, user: UserID, media_info: MediaInfo, og: JsonDict
|
||||
) -> None:
|
||||
"""
|
||||
Pre-cache the image (if one exists) for posterity
|
||||
|
||||
Args:
|
||||
user: The user requesting the preview.
|
||||
media_info: The media being previewed.
|
||||
og: The Open Graph dictionary. This is modified with image information.
|
||||
"""
|
||||
# If there's no image or it is blank, there's nothing to do.
|
||||
if "og:image" not in og:
|
||||
return
|
||||
|
||||
# Remove the raw image URL, this will be replaced with an MXC URL, if successful.
|
||||
image_url = og.pop("og:image")
|
||||
if not image_url:
|
||||
return
|
||||
|
||||
# The image URL from the HTML might be relative to the previewed page,
|
||||
# convert it to an URL which can be requested directly.
|
||||
url_parts = urlparse(image_url)
|
||||
if url_parts.scheme != "data":
|
||||
image_url = urljoin(media_info.uri, image_url)
|
||||
|
||||
# FIXME: it might be cleaner to use the same flow as the main /preview_url
|
||||
# request itself and benefit from the same caching etc. But for now we
|
||||
# just rely on the caching on the master request to speed things up.
|
||||
try:
|
||||
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
|
||||
except Exception as e:
|
||||
# Pre-caching the image failed, don't block the entire URL preview.
|
||||
logger.warning(
|
||||
"Pre-caching image failed during URL preview: %s errored with %s",
|
||||
image_url,
|
||||
e,
|
||||
)
|
||||
return
|
||||
|
||||
if _is_media(image_info.media_type):
|
||||
# TODO: make sure we don't choke on white-on-transparent images
|
||||
file_id = image_info.filesystem_id
|
||||
dims = await self.media_repo._generate_thumbnails(
|
||||
None, file_id, file_id, image_info.media_type, url_cache=True
|
||||
)
|
||||
if dims:
|
||||
og["og:image:width"] = dims["width"]
|
||||
og["og:image:height"] = dims["height"]
|
||||
else:
|
||||
logger.warning("Couldn't get dims for %s", image_url)
|
||||
|
||||
og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
|
||||
og["og:image:type"] = image_info.media_type
|
||||
og["matrix:image:size"] = image_info.media_length
|
||||
|
||||
async def _handle_oembed_response(
|
||||
self, url: str, media_info: MediaInfo, expiration_ms: int
|
||||
) -> Tuple[JsonDict, Optional[str], int]:
|
||||
"""
|
||||
Parse the downloaded oEmbed info.
|
||||
|
||||
Args:
|
||||
url: The URL which is being previewed (not the one which was
|
||||
requested).
|
||||
media_info: The media being previewed.
|
||||
expiration_ms: The length of time, in milliseconds, the media is valid for.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
The Open Graph dictionary, if the oEmbed info can be parsed.
|
||||
The author name if it could be retrieved from oEmbed.
|
||||
The (possibly updated) length of time, in milliseconds, the media is valid for.
|
||||
"""
|
||||
# If JSON was not returned, there's nothing to do.
|
||||
if not _is_json(media_info.media_type):
|
||||
return {}, None, expiration_ms
|
||||
|
||||
with open(media_info.filename, "rb") as file:
|
||||
body = file.read()
|
||||
|
||||
oembed_response = self._oembed.parse_oembed_response(url, body)
|
||||
open_graph_result = oembed_response.open_graph_result
|
||||
|
||||
# Use the cache age from the oEmbed result, if one was given.
|
||||
if open_graph_result and oembed_response.cache_age is not None:
|
||||
expiration_ms = oembed_response.cache_age
|
||||
|
||||
return open_graph_result, oembed_response.author_name, expiration_ms
|
||||
|
||||
def _start_expire_url_cache_data(self) -> Deferred:
|
||||
return run_as_background_process(
|
||||
"expire_url_cache_data", self._expire_url_cache_data
|
||||
)
|
||||
|
||||
async def _expire_url_cache_data(self) -> None:
|
||||
"""Clean up expired url cache content, media and thumbnails."""
|
||||
|
||||
assert self._worker_run_media_background_jobs
|
||||
|
||||
now = self.clock.time_msec()
|
||||
|
||||
logger.debug("Running url preview cache expiry")
|
||||
|
||||
def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
|
||||
"""Attempt to remove the given chain of parent directories
|
||||
|
||||
Args:
|
||||
dirs: The list of directory paths to delete, with children appearing
|
||||
before their parents.
|
||||
"""
|
||||
for dir in dirs:
|
||||
try:
|
||||
os.rmdir(dir)
|
||||
except FileNotFoundError:
|
||||
# Already deleted, continue with deleting the rest
|
||||
pass
|
||||
except OSError as e:
|
||||
# Failed, skip deleting the rest of the parent dirs
|
||||
if e.errno != errno.ENOTEMPTY:
|
||||
logger.warning(
|
||||
"Failed to remove media directory while clearing url preview cache: %r: %s",
|
||||
dir,
|
||||
e,
|
||||
)
|
||||
break
|
||||
|
||||
# First we delete expired url cache entries
|
||||
media_ids = await self.store.get_expired_url_cache(now)
|
||||
|
||||
removed_media = []
|
||||
for media_id in media_ids:
|
||||
fname = self.filepaths.url_cache_filepath(media_id)
|
||||
try:
|
||||
os.remove(fname)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media while clearing url preview cache: %r: %s",
|
||||
media_id,
|
||||
e,
|
||||
)
|
||||
continue
|
||||
|
||||
removed_media.append(media_id)
|
||||
|
||||
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
await self.store.delete_url_cache(removed_media)
|
||||
|
||||
if removed_media:
|
||||
logger.debug(
|
||||
"Deleted %d entries from url preview cache", len(removed_media)
|
||||
)
|
||||
else:
|
||||
logger.debug("No entries removed from url preview cache")
|
||||
|
||||
# Now we delete old images associated with the url cache.
|
||||
# These may be cached for a bit on the client (i.e., they
|
||||
# may have a room open with a preview url thing open).
|
||||
# So we wait a couple of days before deleting, just in case.
|
||||
expire_before = now - IMAGE_CACHE_EXPIRY_MS
|
||||
media_ids = await self.store.get_url_cache_media_before(expire_before)
|
||||
|
||||
removed_media = []
|
||||
for media_id in media_ids:
|
||||
fname = self.filepaths.url_cache_filepath(media_id)
|
||||
try:
|
||||
os.remove(fname)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media from url preview cache: %r: %s", media_id, e
|
||||
)
|
||||
continue
|
||||
|
||||
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
|
||||
try:
|
||||
shutil.rmtree(thumbnail_dir)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media from url preview cache: %r: %s", media_id, e
|
||||
)
|
||||
continue
|
||||
|
||||
removed_media.append(media_id)
|
||||
|
||||
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
|
||||
# Note that one of the directories to be deleted has already been
|
||||
# removed by the `rmtree` above.
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
await self.store.delete_url_cache_media(removed_media)
|
||||
|
||||
if removed_media:
|
||||
logger.debug("Deleted %d media from url preview cache", len(removed_media))
|
||||
else:
|
||||
logger.debug("No media removed from url preview cache")
|
||||
|
||||
|
||||
def _is_media(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("image/")
|
||||
|
||||
|
||||
def _is_html(content_type: str) -> bool:
|
||||
content_type = content_type.lower()
|
||||
return content_type.startswith("text/html") or content_type.startswith(
|
||||
"application/xhtml"
|
||||
)
|
||||
|
||||
|
||||
def _is_json(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("application/json")
|
||||
|
||||
|
||||
def _is_previewable(content_type: str) -> bool:
|
||||
"""Returns True for content types for which we will perform URL preview and False
|
||||
otherwise."""
|
||||
|
||||
return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
|
||||
@@ -625,23 +625,6 @@ class ReplicationCommandHandler:
|
||||
|
||||
self._notifier.notify_remote_server_up(cmd.data)
|
||||
|
||||
# We relay to all other connections to ensure every instance gets the
|
||||
# notification.
|
||||
#
|
||||
# When configured to use redis we'll always only have one connection and
|
||||
# so this is a no-op (all instances will have already received the same
|
||||
# REMOTE_SERVER_UP command).
|
||||
#
|
||||
# For direct TCP connections this will relay to all other connections
|
||||
# connected to us. When on master this will correctly fan out to all
|
||||
# other direct TCP clients and on workers there'll only be the one
|
||||
# connection to master.
|
||||
#
|
||||
# (The logic here should also be sound if we have a mix of Redis and
|
||||
# direct TCP connections so long as there is only one traffic route
|
||||
# between two instances, but that is not currently supported).
|
||||
self.send_command(cmd, ignore_conn=conn)
|
||||
|
||||
def new_connection(self, connection: IReplicationConnection) -> None:
|
||||
"""Called when we have a new connection."""
|
||||
self._connections.append(connection)
|
||||
@@ -689,21 +672,14 @@ class ReplicationCommandHandler:
|
||||
"""
|
||||
return bool(self._connections)
|
||||
|
||||
def send_command(
|
||||
self, cmd: Command, ignore_conn: Optional[IReplicationConnection] = None
|
||||
) -> None:
|
||||
def send_command(self, cmd: Command) -> None:
|
||||
"""Send a command to all connected connections.
|
||||
|
||||
Args:
|
||||
cmd
|
||||
ignore_conn: If set don't send command to the given connection.
|
||||
Used when relaying commands from one connection to all others.
|
||||
"""
|
||||
if self._connections:
|
||||
for connection in self._connections:
|
||||
if connection == ignore_conn:
|
||||
continue
|
||||
|
||||
try:
|
||||
connection.send_command(cmd)
|
||||
except Exception:
|
||||
|
||||
@@ -20,6 +20,7 @@ from synapse.rest.client import (
|
||||
account,
|
||||
account_data,
|
||||
account_validity,
|
||||
appservice_ping,
|
||||
auth,
|
||||
capabilities,
|
||||
devices,
|
||||
@@ -140,6 +141,7 @@ class ClientRestResource(JsonResource):
|
||||
if is_main_process:
|
||||
password_policy.register_servlets(hs, client_resource)
|
||||
knock.register_servlets(hs, client_resource)
|
||||
appservice_ping.register_servlets(hs, client_resource)
|
||||
|
||||
# moving to /_synapse/admin
|
||||
if is_main_process:
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
# Copyright 2023 Tulir Asokan
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
import time
|
||||
from http import HTTPStatus
|
||||
from typing import TYPE_CHECKING, Any, Dict, Tuple
|
||||
|
||||
from synapse.api.errors import (
|
||||
CodeMessageException,
|
||||
Codes,
|
||||
HttpResponseException,
|
||||
SynapseError,
|
||||
)
|
||||
from synapse.http import RequestTimedOutError
|
||||
from synapse.http.server import HttpServer
|
||||
from synapse.http.servlet import RestServlet, parse_json_object_from_request
|
||||
from synapse.http.site import SynapseRequest
|
||||
from synapse.types import JsonDict
|
||||
|
||||
from ._base import client_patterns
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from synapse.server import HomeServer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppservicePingRestServlet(RestServlet):
|
||||
PATTERNS = client_patterns(
|
||||
"/fi.mau.msc2659/appservice/(?P<appservice_id>[^/]*)/ping",
|
||||
unstable=True,
|
||||
releases=(),
|
||||
)
|
||||
|
||||
def __init__(self, hs: "HomeServer"):
|
||||
super().__init__()
|
||||
self.as_api = hs.get_application_service_api()
|
||||
self.auth = hs.get_auth()
|
||||
|
||||
async def on_POST(
|
||||
self, request: SynapseRequest, appservice_id: str
|
||||
) -> Tuple[int, JsonDict]:
|
||||
requester = await self.auth.get_user_by_req(request)
|
||||
|
||||
if not requester.app_service:
|
||||
raise SynapseError(
|
||||
HTTPStatus.FORBIDDEN,
|
||||
"Only application services can use the /appservice/ping endpoint",
|
||||
Codes.FORBIDDEN,
|
||||
)
|
||||
elif requester.app_service.id != appservice_id:
|
||||
raise SynapseError(
|
||||
HTTPStatus.FORBIDDEN,
|
||||
"Mismatching application service ID in path",
|
||||
Codes.FORBIDDEN,
|
||||
)
|
||||
elif not requester.app_service.url:
|
||||
raise SynapseError(
|
||||
HTTPStatus.BAD_REQUEST,
|
||||
"The application service does not have a URL set",
|
||||
Codes.AS_PING_URL_NOT_SET,
|
||||
)
|
||||
|
||||
content = parse_json_object_from_request(request)
|
||||
txn_id = content.get("transaction_id", None)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
await self.as_api.ping(requester.app_service, txn_id)
|
||||
except RequestTimedOutError as e:
|
||||
raise SynapseError(
|
||||
HTTPStatus.GATEWAY_TIMEOUT,
|
||||
e.msg,
|
||||
Codes.AS_PING_CONNECTION_TIMEOUT,
|
||||
)
|
||||
except CodeMessageException as e:
|
||||
additional_fields: Dict[str, Any] = {"status": e.code}
|
||||
if isinstance(e, HttpResponseException):
|
||||
try:
|
||||
additional_fields["body"] = e.response.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
raise SynapseError(
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
f"HTTP {e.code} {e.msg}",
|
||||
Codes.AS_PING_BAD_STATUS,
|
||||
additional_fields=additional_fields,
|
||||
)
|
||||
except Exception as e:
|
||||
raise SynapseError(
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
f"{type(e).__name__}: {e}",
|
||||
Codes.AS_PING_CONNECTION_FAILED,
|
||||
)
|
||||
|
||||
duration = time.monotonic() - start
|
||||
|
||||
return HTTPStatus.OK, {"duration": int(duration * 1000)}
|
||||
|
||||
|
||||
def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
|
||||
if hs.config.experimental.msc2659_enabled:
|
||||
AppservicePingRestServlet(hs).register(http_server)
|
||||
@@ -956,7 +956,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
|
||||
if hs.config.worker.worker_app is None:
|
||||
EmailRegisterRequestTokenRestServlet(hs).register(http_server)
|
||||
MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
|
||||
UsernameAvailabilityRestServlet(hs).register(http_server)
|
||||
RegistrationSubmitTokenServlet(hs).register(http_server)
|
||||
UsernameAvailabilityRestServlet(hs).register(http_server)
|
||||
RegistrationTokenValidityRestServlet(hs).register(http_server)
|
||||
RegisterRestServlet(hs).register(http_server)
|
||||
|
||||
@@ -16,7 +16,7 @@ import logging
|
||||
from http import HTTPStatus
|
||||
from typing import TYPE_CHECKING, Tuple
|
||||
|
||||
from synapse.api.errors import Codes, NotFoundError, SynapseError
|
||||
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
|
||||
from synapse.http.server import HttpServer
|
||||
from synapse.http.servlet import RestServlet, parse_json_object_from_request
|
||||
from synapse.http.site import SynapseRequest
|
||||
@@ -62,12 +62,18 @@ class ReportEventRestServlet(RestServlet):
|
||||
Codes.BAD_JSON,
|
||||
)
|
||||
|
||||
event = await self._event_handler.get_event(
|
||||
requester.user, room_id, event_id, show_redacted=False
|
||||
)
|
||||
try:
|
||||
event = await self._event_handler.get_event(
|
||||
requester.user, room_id, event_id, show_redacted=False
|
||||
)
|
||||
except AuthError:
|
||||
# The event exists, but this user is not allowed to access this event.
|
||||
event = None
|
||||
|
||||
if event is None:
|
||||
raise NotFoundError(
|
||||
"Unable to report event: it does not exist or you aren't able to see it."
|
||||
"Unable to report event: "
|
||||
"it does not exist or you aren't able to see it."
|
||||
)
|
||||
|
||||
await self.store.add_event_report(
|
||||
|
||||
@@ -109,6 +109,8 @@ class VersionsRestServlet(RestServlet):
|
||||
"org.matrix.msc3773": self.config.experimental.msc3773_enabled,
|
||||
# Allows moderators to fetch redacted event content as described in MSC2815
|
||||
"fi.mau.msc2815": self.config.experimental.msc2815_enabled,
|
||||
# Adds a ping endpoint for appservices to check HS->AS connection
|
||||
"fi.mau.msc2659": self.config.experimental.msc2659_enabled,
|
||||
# Adds support for login token requests as per MSC3882
|
||||
"org.matrix.msc3882": self.config.experimental.msc3882_enabled,
|
||||
# Adds support for remotely enabling/disabling pushers, as per MSC3881
|
||||
@@ -120,6 +122,8 @@ class VersionsRestServlet(RestServlet):
|
||||
is not None,
|
||||
# Adds support for relation-based redactions as per MSC3912.
|
||||
"org.matrix.msc3912": self.config.experimental.msc3912_enabled,
|
||||
# Adds support for unstable "intentional mentions" behaviour.
|
||||
"org.matrix.msc3952_intentional_mentions": self.config.experimental.msc3952_intentional_mentions,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -12,26 +12,9 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import datetime
|
||||
import errno
|
||||
import fnmatch
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
|
||||
from urllib.parse import urljoin, urlparse, urlsplit
|
||||
from urllib.request import urlopen
|
||||
|
||||
import attr
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.error import DNSLookupError
|
||||
|
||||
from synapse.api.errors import Codes, SynapseError
|
||||
from synapse.http.client import SimpleHttpClient
|
||||
from synapse.http.server import (
|
||||
DirectServeJsonResource,
|
||||
respond_with_json,
|
||||
@@ -39,71 +22,13 @@ from synapse.http.server import (
|
||||
)
|
||||
from synapse.http.servlet import parse_integer, parse_string
|
||||
from synapse.http.site import SynapseRequest
|
||||
from synapse.logging.context import make_deferred_yieldable, run_in_background
|
||||
from synapse.media._base import FileInfo, get_filename_from_headers
|
||||
from synapse.media.media_storage import MediaStorage
|
||||
from synapse.media.oembed import OEmbedProvider
|
||||
from synapse.media.preview_html import decode_body, parse_html_to_open_graph
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.types import JsonDict, UserID
|
||||
from synapse.util import json_encoder
|
||||
from synapse.util.async_helpers import ObservableDeferred
|
||||
from synapse.util.caches.expiringcache import ExpiringCache
|
||||
from synapse.util.stringutils import random_string
|
||||
from synapse.media.url_previewer import UrlPreviewer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from synapse.media.media_repository import MediaRepository
|
||||
from synapse.server import HomeServer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OG_TAG_NAME_MAXLEN = 50
|
||||
OG_TAG_VALUE_MAXLEN = 1000
|
||||
|
||||
ONE_HOUR = 60 * 60 * 1000
|
||||
ONE_DAY = 24 * ONE_HOUR
|
||||
IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class DownloadResult:
|
||||
length: int
|
||||
uri: str
|
||||
response_code: int
|
||||
media_type: str
|
||||
download_name: Optional[str]
|
||||
expires: int
|
||||
etag: Optional[str]
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class MediaInfo:
|
||||
"""
|
||||
Information parsed from downloading media being previewed.
|
||||
"""
|
||||
|
||||
# The Content-Type header of the response.
|
||||
media_type: str
|
||||
# The length (in bytes) of the downloaded media.
|
||||
media_length: int
|
||||
# The media filename, according to the server. This is parsed from the
|
||||
# returned headers, if possible.
|
||||
download_name: Optional[str]
|
||||
# The time of the preview.
|
||||
created_ts_ms: int
|
||||
# Information from the media storage provider about where the file is stored
|
||||
# on disk.
|
||||
filesystem_id: str
|
||||
filename: str
|
||||
# The URI being previewed.
|
||||
uri: str
|
||||
# The HTTP response code.
|
||||
response_code: int
|
||||
# The timestamp (in milliseconds) of when this preview expires.
|
||||
expires: int
|
||||
# The ETag header of the response.
|
||||
etag: Optional[str]
|
||||
|
||||
|
||||
class PreviewUrlResource(DirectServeJsonResource):
|
||||
"""
|
||||
@@ -121,54 +46,6 @@ class PreviewUrlResource(DirectServeJsonResource):
|
||||
* The URL metadata must be stored somewhere, rather than just using Matrix
|
||||
itself to store the media.
|
||||
* Matrix cannot be used to distribute the metadata between homeservers.
|
||||
|
||||
When Synapse is asked to preview a URL it does the following:
|
||||
|
||||
1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
|
||||
config).
|
||||
2. Checks the URL against an in-memory cache and returns the result if it exists. (This
|
||||
is also used to de-duplicate processing of multiple in-flight requests at once.)
|
||||
3. Kicks off a background process to generate a preview:
|
||||
1. Checks URL and timestamp against the database cache and returns the result if it
|
||||
has not expired and was successful (a 2xx return code).
|
||||
2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
|
||||
does, update the URL to download.
|
||||
3. Downloads the URL and stores it into a file via the media storage provider
|
||||
and saves the local media metadata.
|
||||
4. If the media is an image:
|
||||
1. Generates thumbnails.
|
||||
2. Generates an Open Graph response based on image properties.
|
||||
5. If the media is HTML:
|
||||
1. Decodes the HTML via the stored file.
|
||||
2. Generates an Open Graph response from the HTML.
|
||||
3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
|
||||
1. Downloads the URL and stores it into a file via the media storage provider
|
||||
and saves the local media metadata.
|
||||
2. Convert the oEmbed response to an Open Graph response.
|
||||
3. Override any Open Graph data from the HTML with data from oEmbed.
|
||||
4. If an image exists in the Open Graph response:
|
||||
1. Downloads the URL and stores it into a file via the media storage
|
||||
provider and saves the local media metadata.
|
||||
2. Generates thumbnails.
|
||||
3. Updates the Open Graph response based on image properties.
|
||||
6. If the media is JSON and an oEmbed URL was found:
|
||||
1. Convert the oEmbed response to an Open Graph response.
|
||||
2. If a thumbnail or image is in the oEmbed response:
|
||||
1. Downloads the URL and stores it into a file via the media storage
|
||||
provider and saves the local media metadata.
|
||||
2. Generates thumbnails.
|
||||
3. Updates the Open Graph response based on image properties.
|
||||
7. Stores the result in the database cache.
|
||||
4. Returns the result.
|
||||
|
||||
If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
|
||||
image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
|
||||
does not fail. As much information as possible is returned.
|
||||
|
||||
The in-memory cache expires after 1 hour.
|
||||
|
||||
Expired entries in the database cache (and their associated media files) are
|
||||
deleted every 10 seconds. The default expiration time is 1 hour from download.
|
||||
"""
|
||||
|
||||
isLeaf = True
|
||||
@@ -183,48 +60,10 @@ class PreviewUrlResource(DirectServeJsonResource):
|
||||
|
||||
self.auth = hs.get_auth()
|
||||
self.clock = hs.get_clock()
|
||||
self.filepaths = media_repo.filepaths
|
||||
self.max_spider_size = hs.config.media.max_spider_size
|
||||
self.server_name = hs.hostname
|
||||
self.store = hs.get_datastores().main
|
||||
self.client = SimpleHttpClient(
|
||||
hs,
|
||||
treq_args={"browser_like_redirects": True},
|
||||
ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
|
||||
ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
|
||||
use_proxy=True,
|
||||
)
|
||||
self.media_repo = media_repo
|
||||
self.primary_base_path = media_repo.primary_base_path
|
||||
self.media_storage = media_storage
|
||||
|
||||
self._oembed = OEmbedProvider(hs)
|
||||
|
||||
# We run the background jobs if we're the instance specified (or no
|
||||
# instance is specified, where we assume there is only one instance
|
||||
# serving media).
|
||||
instance_running_jobs = hs.config.media.media_instance_running_background_jobs
|
||||
self._worker_run_media_background_jobs = (
|
||||
instance_running_jobs is None
|
||||
or instance_running_jobs == hs.get_instance_name()
|
||||
)
|
||||
|
||||
self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
|
||||
self.url_preview_accept_language = hs.config.media.url_preview_accept_language
|
||||
|
||||
# memory cache mapping urls to an ObservableDeferred returning
|
||||
# JSON-encoded OG metadata
|
||||
self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
|
||||
cache_name="url_previews",
|
||||
clock=self.clock,
|
||||
# don't spider URLs more often than once an hour
|
||||
expiry_ms=ONE_HOUR,
|
||||
)
|
||||
|
||||
if self._worker_run_media_background_jobs:
|
||||
self._cleaner_loop = self.clock.looping_call(
|
||||
self._start_expire_url_cache_data, 10 * 1000
|
||||
)
|
||||
self._url_previewer = UrlPreviewer(hs, media_repo, media_storage)
|
||||
|
||||
async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
|
||||
request.setHeader(b"Allow", b"OPTIONS, GET")
|
||||
@@ -238,632 +77,5 @@ class PreviewUrlResource(DirectServeJsonResource):
|
||||
if ts is None:
|
||||
ts = self.clock.time_msec()
|
||||
|
||||
# XXX: we could move this into _do_preview if we wanted.
|
||||
url_tuple = urlsplit(url)
|
||||
for entry in self.url_preview_url_blacklist:
|
||||
match = True
|
||||
for attrib in entry:
|
||||
pattern = entry[attrib]
|
||||
value = getattr(url_tuple, attrib)
|
||||
logger.debug(
|
||||
"Matching attrib '%s' with value '%s' against pattern '%s'",
|
||||
attrib,
|
||||
value,
|
||||
pattern,
|
||||
)
|
||||
|
||||
if value is None:
|
||||
match = False
|
||||
continue
|
||||
|
||||
# Some attributes might not be parsed as strings by urlsplit (such as the
|
||||
# port, which is parsed as an int). Because we use match functions that
|
||||
# expect strings, we want to make sure that's what we give them.
|
||||
value_str = str(value)
|
||||
|
||||
if pattern.startswith("^"):
|
||||
if not re.match(pattern, value_str):
|
||||
match = False
|
||||
continue
|
||||
else:
|
||||
if not fnmatch.fnmatch(value_str, pattern):
|
||||
match = False
|
||||
continue
|
||||
if match:
|
||||
logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
|
||||
raise SynapseError(
|
||||
403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
|
||||
)
|
||||
|
||||
# the in-memory cache:
|
||||
# * ensures that only one request is active at a time
|
||||
# * takes load off the DB for the thundering herds
|
||||
# * also caches any failures (unlike the DB) so we don't keep
|
||||
# requesting the same endpoint
|
||||
|
||||
observable = self._cache.get(url)
|
||||
|
||||
if not observable:
|
||||
download = run_in_background(self._do_preview, url, requester.user, ts)
|
||||
observable = ObservableDeferred(download, consumeErrors=True)
|
||||
self._cache[url] = observable
|
||||
else:
|
||||
logger.info("Returning cached response")
|
||||
|
||||
og = await make_deferred_yieldable(observable.observe())
|
||||
og = await self._url_previewer.preview(url, requester.user, ts)
|
||||
respond_with_json_bytes(request, 200, og, send_cors=True)
|
||||
|
||||
async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
|
||||
"""Check the db, and download the URL and build a preview
|
||||
|
||||
Args:
|
||||
url: The URL to preview.
|
||||
user: The user requesting the preview.
|
||||
ts: The timestamp requested for the preview.
|
||||
|
||||
Returns:
|
||||
json-encoded og data
|
||||
"""
|
||||
# check the URL cache in the DB (which will also provide us with
|
||||
# historical previews, if we have any)
|
||||
cache_result = await self.store.get_url_cache(url, ts)
|
||||
if (
|
||||
cache_result
|
||||
and cache_result["expires_ts"] > ts
|
||||
and cache_result["response_code"] / 100 == 2
|
||||
):
|
||||
# It may be stored as text in the database, not as bytes (such as
|
||||
# PostgreSQL). If so, encode it back before handing it on.
|
||||
og = cache_result["og"]
|
||||
if isinstance(og, str):
|
||||
og = og.encode("utf8")
|
||||
return og
|
||||
|
||||
# If this URL can be accessed via oEmbed, use that instead.
|
||||
url_to_download = url
|
||||
oembed_url = self._oembed.get_oembed_url(url)
|
||||
if oembed_url:
|
||||
url_to_download = oembed_url
|
||||
|
||||
media_info = await self._handle_url(url_to_download, user)
|
||||
|
||||
logger.debug("got media_info of '%s'", media_info)
|
||||
|
||||
# The number of milliseconds that the response should be considered valid.
|
||||
expiration_ms = media_info.expires
|
||||
author_name: Optional[str] = None
|
||||
|
||||
if _is_media(media_info.media_type):
|
||||
file_id = media_info.filesystem_id
|
||||
dims = await self.media_repo._generate_thumbnails(
|
||||
None, file_id, file_id, media_info.media_type, url_cache=True
|
||||
)
|
||||
|
||||
og = {
|
||||
"og:description": media_info.download_name,
|
||||
"og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
|
||||
"og:image:type": media_info.media_type,
|
||||
"matrix:image:size": media_info.media_length,
|
||||
}
|
||||
|
||||
if dims:
|
||||
og["og:image:width"] = dims["width"]
|
||||
og["og:image:height"] = dims["height"]
|
||||
else:
|
||||
logger.warning("Couldn't get dims for %s" % url)
|
||||
|
||||
# define our OG response for this media
|
||||
elif _is_html(media_info.media_type):
|
||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||
|
||||
with open(media_info.filename, "rb") as file:
|
||||
body = file.read()
|
||||
|
||||
tree = decode_body(body, media_info.uri, media_info.media_type)
|
||||
if tree is not None:
|
||||
# Check if this HTML document points to oEmbed information and
|
||||
# defer to that.
|
||||
oembed_url = self._oembed.autodiscover_from_html(tree)
|
||||
og_from_oembed: JsonDict = {}
|
||||
if oembed_url:
|
||||
try:
|
||||
oembed_info = await self._handle_url(
|
||||
oembed_url, user, allow_data_urls=True
|
||||
)
|
||||
except Exception as e:
|
||||
# Fetching the oEmbed info failed, don't block the entire URL preview.
|
||||
logger.warning(
|
||||
"oEmbed fetch failed during URL preview: %s errored with %s",
|
||||
oembed_url,
|
||||
e,
|
||||
)
|
||||
else:
|
||||
(
|
||||
og_from_oembed,
|
||||
author_name,
|
||||
expiration_ms,
|
||||
) = await self._handle_oembed_response(
|
||||
url, oembed_info, expiration_ms
|
||||
)
|
||||
|
||||
# Parse Open Graph information from the HTML in case the oEmbed
|
||||
# response failed or is incomplete.
|
||||
og_from_html = parse_html_to_open_graph(tree)
|
||||
|
||||
# Compile the Open Graph response by using the scraped
|
||||
# information from the HTML and overlaying any information
|
||||
# from the oEmbed response.
|
||||
og = {**og_from_html, **og_from_oembed}
|
||||
|
||||
await self._precache_image_url(user, media_info, og)
|
||||
else:
|
||||
og = {}
|
||||
|
||||
elif oembed_url:
|
||||
# Handle the oEmbed information.
|
||||
og, author_name, expiration_ms = await self._handle_oembed_response(
|
||||
url, media_info, expiration_ms
|
||||
)
|
||||
await self._precache_image_url(user, media_info, og)
|
||||
|
||||
else:
|
||||
logger.warning("Failed to find any OG data in %s", url)
|
||||
og = {}
|
||||
|
||||
# If we don't have a title but we have author_name, copy it as
|
||||
# title
|
||||
if not og.get("og:title") and author_name:
|
||||
og["og:title"] = author_name
|
||||
|
||||
# filter out any stupidly long values
|
||||
keys_to_remove = []
|
||||
for k, v in og.items():
|
||||
# values can be numeric as well as strings, hence the cast to str
|
||||
if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
|
||||
logger.warning(
|
||||
"Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
|
||||
)
|
||||
keys_to_remove.append(k)
|
||||
for k in keys_to_remove:
|
||||
del og[k]
|
||||
|
||||
logger.debug("Calculated OG for %s as %s", url, og)
|
||||
|
||||
jsonog = json_encoder.encode(og)
|
||||
|
||||
# Cap the amount of time to consider a response valid.
|
||||
expiration_ms = min(expiration_ms, ONE_DAY)
|
||||
|
||||
# store OG in history-aware DB cache
|
||||
await self.store.store_url_cache(
|
||||
url,
|
||||
media_info.response_code,
|
||||
media_info.etag,
|
||||
media_info.created_ts_ms + expiration_ms,
|
||||
jsonog,
|
||||
media_info.filesystem_id,
|
||||
media_info.created_ts_ms,
|
||||
)
|
||||
|
||||
return jsonog.encode("utf8")
|
||||
|
||||
async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
|
||||
"""
|
||||
Fetches a remote URL and parses the headers.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
output_stream: The stream to write the content to.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
Media length, URL downloaded, the HTTP response code,
|
||||
the media type, the downloaded file name, the number of
|
||||
milliseconds the result is valid for, the etag header.
|
||||
"""
|
||||
|
||||
try:
|
||||
logger.debug("Trying to get preview for url '%s'", url)
|
||||
length, headers, uri, code = await self.client.get_file(
|
||||
url,
|
||||
output_stream=output_stream,
|
||||
max_size=self.max_spider_size,
|
||||
headers={
|
||||
b"Accept-Language": self.url_preview_accept_language,
|
||||
# Use a custom user agent for the preview because some sites will only return
|
||||
# Open Graph metadata to crawler user agents. Omit the Synapse version
|
||||
# string to avoid leaking information.
|
||||
b"User-Agent": [
|
||||
"Synapse (bot; +https://github.com/matrix-org/synapse)"
|
||||
],
|
||||
},
|
||||
is_allowed_content_type=_is_previewable,
|
||||
)
|
||||
except SynapseError:
|
||||
# Pass SynapseErrors through directly, so that the servlet
|
||||
# handler will return a SynapseError to the client instead of
|
||||
# blank data or a 500.
|
||||
raise
|
||||
except DNSLookupError:
|
||||
# DNS lookup returned no results
|
||||
# Note: This will also be the case if one of the resolved IP
|
||||
# addresses is blacklisted
|
||||
raise SynapseError(
|
||||
502,
|
||||
"DNS resolution failure during URL preview generation",
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
except Exception as e:
|
||||
# FIXME: pass through 404s and other error messages nicely
|
||||
logger.warning("Error downloading %s: %r", url, e)
|
||||
|
||||
raise SynapseError(
|
||||
500,
|
||||
"Failed to download content: %s"
|
||||
% (traceback.format_exception_only(sys.exc_info()[0], e),),
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
|
||||
if b"Content-Type" in headers:
|
||||
media_type = headers[b"Content-Type"][0].decode("ascii")
|
||||
else:
|
||||
media_type = "application/octet-stream"
|
||||
|
||||
download_name = get_filename_from_headers(headers)
|
||||
|
||||
# FIXME: we should calculate a proper expiration based on the
|
||||
# Cache-Control and Expire headers. But for now, assume 1 hour.
|
||||
expires = ONE_HOUR
|
||||
etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
|
||||
|
||||
return DownloadResult(
|
||||
length, uri, code, media_type, download_name, expires, etag
|
||||
)
|
||||
|
||||
async def _parse_data_url(
|
||||
self, url: str, output_stream: BinaryIO
|
||||
) -> DownloadResult:
|
||||
"""
|
||||
Parses a data: URL.
|
||||
|
||||
Args:
|
||||
url: The URL to parse.
|
||||
output_stream: The stream to write the content to.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
Media length, URL downloaded, the HTTP response code,
|
||||
the media type, the downloaded file name, the number of
|
||||
milliseconds the result is valid for, the etag header.
|
||||
"""
|
||||
|
||||
try:
|
||||
logger.debug("Trying to parse data url '%s'", url)
|
||||
with urlopen(url) as url_info:
|
||||
# TODO Can this be more efficient.
|
||||
output_stream.write(url_info.read())
|
||||
except Exception as e:
|
||||
logger.warning("Error parsing data: URL %s: %r", url, e)
|
||||
|
||||
raise SynapseError(
|
||||
500,
|
||||
"Failed to parse data URL: %s"
|
||||
% (traceback.format_exception_only(sys.exc_info()[0], e),),
|
||||
Codes.UNKNOWN,
|
||||
)
|
||||
|
||||
return DownloadResult(
|
||||
# Read back the length that has been written.
|
||||
length=output_stream.tell(),
|
||||
uri=url,
|
||||
# If it was parsed, consider this a 200 OK.
|
||||
response_code=200,
|
||||
# urlopen shoves the media-type from the data URL into the content type
|
||||
# header object.
|
||||
media_type=url_info.headers.get_content_type(),
|
||||
# Some features are not supported by data: URLs.
|
||||
download_name=None,
|
||||
expires=ONE_HOUR,
|
||||
etag=None,
|
||||
)
|
||||
|
||||
async def _handle_url(
|
||||
self, url: str, user: UserID, allow_data_urls: bool = False
|
||||
) -> MediaInfo:
|
||||
"""
|
||||
Fetches content from a URL and parses the result to generate a MediaInfo.
|
||||
|
||||
It uses the media storage provider to persist the fetched content and
|
||||
stores the mapping into the database.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
user: The user who ahs requested this URL.
|
||||
allow_data_urls: True if data URLs should be allowed.
|
||||
|
||||
Returns:
|
||||
A MediaInfo object describing the fetched content.
|
||||
"""
|
||||
|
||||
# TODO: we should probably honour robots.txt... except in practice
|
||||
# we're most likely being explicitly triggered by a human rather than a
|
||||
# bot, so are we really a robot?
|
||||
|
||||
file_id = datetime.date.today().isoformat() + "_" + random_string(16)
|
||||
|
||||
file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
|
||||
|
||||
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
|
||||
if url.startswith("data:"):
|
||||
if not allow_data_urls:
|
||||
raise SynapseError(
|
||||
500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
|
||||
)
|
||||
|
||||
download_result = await self._parse_data_url(url, f)
|
||||
else:
|
||||
download_result = await self._download_url(url, f)
|
||||
|
||||
await finish()
|
||||
|
||||
try:
|
||||
time_now_ms = self.clock.time_msec()
|
||||
|
||||
await self.store.store_local_media(
|
||||
media_id=file_id,
|
||||
media_type=download_result.media_type,
|
||||
time_now_ms=time_now_ms,
|
||||
upload_name=download_result.download_name,
|
||||
media_length=download_result.length,
|
||||
user_id=user,
|
||||
url_cache=url,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling downloaded %s: %r", url, e)
|
||||
# TODO: we really ought to delete the downloaded file in this
|
||||
# case, since we won't have recorded it in the db, and will
|
||||
# therefore not expire it.
|
||||
raise
|
||||
|
||||
return MediaInfo(
|
||||
media_type=download_result.media_type,
|
||||
media_length=download_result.length,
|
||||
download_name=download_result.download_name,
|
||||
created_ts_ms=time_now_ms,
|
||||
filesystem_id=file_id,
|
||||
filename=fname,
|
||||
uri=download_result.uri,
|
||||
response_code=download_result.response_code,
|
||||
expires=download_result.expires,
|
||||
etag=download_result.etag,
|
||||
)
|
||||
|
||||
async def _precache_image_url(
|
||||
self, user: UserID, media_info: MediaInfo, og: JsonDict
|
||||
) -> None:
|
||||
"""
|
||||
Pre-cache the image (if one exists) for posterity
|
||||
|
||||
Args:
|
||||
user: The user requesting the preview.
|
||||
media_info: The media being previewed.
|
||||
og: The Open Graph dictionary. This is modified with image information.
|
||||
"""
|
||||
# If there's no image or it is blank, there's nothing to do.
|
||||
if "og:image" not in og:
|
||||
return
|
||||
|
||||
# Remove the raw image URL, this will be replaced with an MXC URL, if successful.
|
||||
image_url = og.pop("og:image")
|
||||
if not image_url:
|
||||
return
|
||||
|
||||
# The image URL from the HTML might be relative to the previewed page,
|
||||
# convert it to an URL which can be requested directly.
|
||||
url_parts = urlparse(image_url)
|
||||
if url_parts.scheme != "data":
|
||||
image_url = urljoin(media_info.uri, image_url)
|
||||
|
||||
# FIXME: it might be cleaner to use the same flow as the main /preview_url
|
||||
# request itself and benefit from the same caching etc. But for now we
|
||||
# just rely on the caching on the master request to speed things up.
|
||||
try:
|
||||
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
|
||||
except Exception as e:
|
||||
# Pre-caching the image failed, don't block the entire URL preview.
|
||||
logger.warning(
|
||||
"Pre-caching image failed during URL preview: %s errored with %s",
|
||||
image_url,
|
||||
e,
|
||||
)
|
||||
return
|
||||
|
||||
if _is_media(image_info.media_type):
|
||||
# TODO: make sure we don't choke on white-on-transparent images
|
||||
file_id = image_info.filesystem_id
|
||||
dims = await self.media_repo._generate_thumbnails(
|
||||
None, file_id, file_id, image_info.media_type, url_cache=True
|
||||
)
|
||||
if dims:
|
||||
og["og:image:width"] = dims["width"]
|
||||
og["og:image:height"] = dims["height"]
|
||||
else:
|
||||
logger.warning("Couldn't get dims for %s", image_url)
|
||||
|
||||
og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
|
||||
og["og:image:type"] = image_info.media_type
|
||||
og["matrix:image:size"] = image_info.media_length
|
||||
|
||||
async def _handle_oembed_response(
|
||||
self, url: str, media_info: MediaInfo, expiration_ms: int
|
||||
) -> Tuple[JsonDict, Optional[str], int]:
|
||||
"""
|
||||
Parse the downloaded oEmbed info.
|
||||
|
||||
Args:
|
||||
url: The URL which is being previewed (not the one which was
|
||||
requested).
|
||||
media_info: The media being previewed.
|
||||
expiration_ms: The length of time, in milliseconds, the media is valid for.
|
||||
|
||||
Returns:
|
||||
A tuple of:
|
||||
The Open Graph dictionary, if the oEmbed info can be parsed.
|
||||
The author name if it could be retrieved from oEmbed.
|
||||
The (possibly updated) length of time, in milliseconds, the media is valid for.
|
||||
"""
|
||||
# If JSON was not returned, there's nothing to do.
|
||||
if not _is_json(media_info.media_type):
|
||||
return {}, None, expiration_ms
|
||||
|
||||
with open(media_info.filename, "rb") as file:
|
||||
body = file.read()
|
||||
|
||||
oembed_response = self._oembed.parse_oembed_response(url, body)
|
||||
open_graph_result = oembed_response.open_graph_result
|
||||
|
||||
# Use the cache age from the oEmbed result, if one was given.
|
||||
if open_graph_result and oembed_response.cache_age is not None:
|
||||
expiration_ms = oembed_response.cache_age
|
||||
|
||||
return open_graph_result, oembed_response.author_name, expiration_ms
|
||||
|
||||
def _start_expire_url_cache_data(self) -> Deferred:
|
||||
return run_as_background_process(
|
||||
"expire_url_cache_data", self._expire_url_cache_data
|
||||
)
|
||||
|
||||
async def _expire_url_cache_data(self) -> None:
|
||||
"""Clean up expired url cache content, media and thumbnails."""
|
||||
|
||||
assert self._worker_run_media_background_jobs
|
||||
|
||||
now = self.clock.time_msec()
|
||||
|
||||
logger.debug("Running url preview cache expiry")
|
||||
|
||||
def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
|
||||
"""Attempt to remove the given chain of parent directories
|
||||
|
||||
Args:
|
||||
dirs: The list of directory paths to delete, with children appearing
|
||||
before their parents.
|
||||
"""
|
||||
for dir in dirs:
|
||||
try:
|
||||
os.rmdir(dir)
|
||||
except FileNotFoundError:
|
||||
# Already deleted, continue with deleting the rest
|
||||
pass
|
||||
except OSError as e:
|
||||
# Failed, skip deleting the rest of the parent dirs
|
||||
if e.errno != errno.ENOTEMPTY:
|
||||
logger.warning(
|
||||
"Failed to remove media directory while clearing url preview cache: %r: %s",
|
||||
dir,
|
||||
e,
|
||||
)
|
||||
break
|
||||
|
||||
# First we delete expired url cache entries
|
||||
media_ids = await self.store.get_expired_url_cache(now)
|
||||
|
||||
removed_media = []
|
||||
for media_id in media_ids:
|
||||
fname = self.filepaths.url_cache_filepath(media_id)
|
||||
try:
|
||||
os.remove(fname)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media while clearing url preview cache: %r: %s",
|
||||
media_id,
|
||||
e,
|
||||
)
|
||||
continue
|
||||
|
||||
removed_media.append(media_id)
|
||||
|
||||
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
await self.store.delete_url_cache(removed_media)
|
||||
|
||||
if removed_media:
|
||||
logger.debug(
|
||||
"Deleted %d entries from url preview cache", len(removed_media)
|
||||
)
|
||||
else:
|
||||
logger.debug("No entries removed from url preview cache")
|
||||
|
||||
# Now we delete old images associated with the url cache.
|
||||
# These may be cached for a bit on the client (i.e., they
|
||||
# may have a room open with a preview url thing open).
|
||||
# So we wait a couple of days before deleting, just in case.
|
||||
expire_before = now - IMAGE_CACHE_EXPIRY_MS
|
||||
media_ids = await self.store.get_url_cache_media_before(expire_before)
|
||||
|
||||
removed_media = []
|
||||
for media_id in media_ids:
|
||||
fname = self.filepaths.url_cache_filepath(media_id)
|
||||
try:
|
||||
os.remove(fname)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media from url preview cache: %r: %s", media_id, e
|
||||
)
|
||||
continue
|
||||
|
||||
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
|
||||
try:
|
||||
shutil.rmtree(thumbnail_dir)
|
||||
except FileNotFoundError:
|
||||
pass # If the path doesn't exist, meh
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Failed to remove media from url preview cache: %r: %s", media_id, e
|
||||
)
|
||||
continue
|
||||
|
||||
removed_media.append(media_id)
|
||||
|
||||
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
|
||||
# Note that one of the directories to be deleted has already been
|
||||
# removed by the `rmtree` above.
|
||||
try_remove_parent_dirs(dirs)
|
||||
|
||||
await self.store.delete_url_cache_media(removed_media)
|
||||
|
||||
if removed_media:
|
||||
logger.debug("Deleted %d media from url preview cache", len(removed_media))
|
||||
else:
|
||||
logger.debug("No media removed from url preview cache")
|
||||
|
||||
|
||||
def _is_media(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("image/")
|
||||
|
||||
|
||||
def _is_html(content_type: str) -> bool:
|
||||
content_type = content_type.lower()
|
||||
return content_type.startswith("text/html") or content_type.startswith(
|
||||
"application/xhtml"
|
||||
)
|
||||
|
||||
|
||||
def _is_json(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("application/json")
|
||||
|
||||
|
||||
def _is_previewable(content_type: str) -> bool:
|
||||
"""Returns True for content types for which we will perform URL preview and False
|
||||
otherwise."""
|
||||
|
||||
return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
|
||||
|
||||
@@ -805,7 +805,6 @@ class EventsWorkerStore(SQLBaseStore):
|
||||
# the events have been redacted, and if so pulling the redaction event
|
||||
# out of the database to check it.
|
||||
#
|
||||
missing_events = {}
|
||||
try:
|
||||
# Try to fetch from any external cache. We already checked the
|
||||
# in-memory cache above.
|
||||
|
||||
@@ -472,12 +472,11 @@ class RelationsWorkerStore(SQLBaseStore):
|
||||
the event will map to None.
|
||||
"""
|
||||
|
||||
# We only allow edits for `m.room.message` events that have the same sender
|
||||
# and event type. We can't assert these things during regular event auth so
|
||||
# we have to do the checks post hoc.
|
||||
# We only allow edits for events that have the same sender and event type.
|
||||
# We can't assert these things during regular event auth so we have to do
|
||||
# the checks post hoc.
|
||||
|
||||
# Fetches latest edit that has the same type and sender as the
|
||||
# original, and is an `m.room.message`.
|
||||
# Fetches latest edit that has the same type and sender as the original.
|
||||
if isinstance(self.database_engine, PostgresEngine):
|
||||
# The `DISTINCT ON` clause will pick the *first* row it encounters,
|
||||
# so ordering by origin server ts + event ID desc will ensure we get
|
||||
@@ -493,7 +492,6 @@ class RelationsWorkerStore(SQLBaseStore):
|
||||
WHERE
|
||||
%s
|
||||
AND relation_type = ?
|
||||
AND edit.type = 'm.room.message'
|
||||
ORDER by original.event_id DESC, edit.origin_server_ts DESC, edit.event_id DESC
|
||||
"""
|
||||
else:
|
||||
@@ -512,7 +510,6 @@ class RelationsWorkerStore(SQLBaseStore):
|
||||
WHERE
|
||||
%s
|
||||
AND relation_type = ?
|
||||
AND edit.type = 'm.room.message'
|
||||
ORDER by edit.origin_server_ts, edit.event_id
|
||||
"""
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
# Copyright 2020 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from twisted.internet.address import IPv4Address
|
||||
from twisted.internet.interfaces import IProtocol
|
||||
from twisted.test.proto_helpers import MemoryReactor, StringTransport
|
||||
|
||||
from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
|
||||
from synapse.server import HomeServer
|
||||
from synapse.util import Clock
|
||||
|
||||
from tests.unittest import HomeserverTestCase
|
||||
|
||||
|
||||
class RemoteServerUpTestCase(HomeserverTestCase):
|
||||
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||
self.factory = ReplicationStreamProtocolFactory(hs)
|
||||
|
||||
def _make_client(self) -> Tuple[IProtocol, StringTransport]:
|
||||
"""Create a new direct TCP replication connection"""
|
||||
|
||||
proto = self.factory.buildProtocol(IPv4Address("TCP", "127.0.0.1", 0))
|
||||
transport = StringTransport()
|
||||
proto.makeConnection(transport)
|
||||
|
||||
# We can safely ignore the commands received during connection.
|
||||
self.pump()
|
||||
transport.clear()
|
||||
|
||||
return proto, transport
|
||||
|
||||
def test_relay(self) -> None:
|
||||
"""Test that Synapse will relay REMOTE_SERVER_UP commands to all
|
||||
other connections, but not the one that sent it.
|
||||
"""
|
||||
|
||||
proto1, transport1 = self._make_client()
|
||||
|
||||
# We shouldn't receive an echo.
|
||||
proto1.dataReceived(b"REMOTE_SERVER_UP example.com\n")
|
||||
self.pump()
|
||||
self.assertEqual(transport1.value(), b"")
|
||||
|
||||
# But we should see an echo if we connect another client
|
||||
proto2, transport2 = self._make_client()
|
||||
proto1.dataReceived(b"REMOTE_SERVER_UP example.com\n")
|
||||
|
||||
self.pump()
|
||||
self.assertEqual(transport1.value(), b"")
|
||||
self.assertEqual(transport2.value(), b"REMOTE_SERVER_UP example.com\n")
|
||||
@@ -402,6 +402,21 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase):
|
||||
# Assert we can no longer peek into the room
|
||||
self._assert_peek(self.room_id, expect_code=403)
|
||||
|
||||
def test_room_delete_send(self) -> None:
|
||||
"""Test that sending into a deleted room returns a 403"""
|
||||
channel = self.make_request(
|
||||
"DELETE",
|
||||
self.url,
|
||||
content={},
|
||||
access_token=self.admin_user_tok,
|
||||
)
|
||||
|
||||
self.assertEqual(200, channel.code, msg=channel.json_body)
|
||||
|
||||
self.helper.send(
|
||||
self.room_id, "test message", expect_code=403, tok=self.other_user_tok
|
||||
)
|
||||
|
||||
def _is_blocked(self, room_id: str, expect: bool = True) -> None:
|
||||
"""Assert that the room is blocked or not"""
|
||||
d = self.store.is_room_blocked(room_id)
|
||||
|
||||
@@ -84,6 +84,48 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
|
||||
access_token=self.other_user_tok,
|
||||
)
|
||||
self.assertEqual(404, channel.code, msg=channel.result["body"])
|
||||
self.assertEqual(
|
||||
"Unable to report event: it does not exist or you aren't able to see it.",
|
||||
channel.json_body["error"],
|
||||
msg=channel.result["body"],
|
||||
)
|
||||
|
||||
def test_cannot_report_event_if_not_in_room(self) -> None:
|
||||
"""
|
||||
Tests that we don't accept event reports for events that exist, but for which
|
||||
the reporter should not be able to view (because they are not in the room).
|
||||
"""
|
||||
# Have the admin user create a room (the "other" user will not join this room).
|
||||
new_room_id = self.helper.create_room_as(tok=self.admin_user_tok)
|
||||
|
||||
# Have the admin user send an event in this room.
|
||||
response = self.helper.send_event(
|
||||
new_room_id,
|
||||
"m.room.message",
|
||||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": "This event has some bad words in it! Flip!",
|
||||
},
|
||||
tok=self.admin_user_tok,
|
||||
)
|
||||
event_id = response["event_id"]
|
||||
|
||||
# Have the "other" user attempt to report it. Perhaps they found the event ID
|
||||
# in a screenshot or something...
|
||||
channel = self.make_request(
|
||||
"POST",
|
||||
f"rooms/{new_room_id}/report/{event_id}",
|
||||
{"reason": "I'm not in this room but I have opinions anyways!"},
|
||||
access_token=self.other_user_tok,
|
||||
)
|
||||
|
||||
# The "other" user is not in the room, so their report should be rejected.
|
||||
self.assertEqual(404, channel.code, msg=channel.result["body"])
|
||||
self.assertEqual(
|
||||
"Unable to report event: it does not exist or you aren't able to see it.",
|
||||
channel.json_body["error"],
|
||||
msg=channel.result["body"],
|
||||
)
|
||||
|
||||
def _assert_status(self, response_status: int, data: JsonDict) -> None:
|
||||
channel = self.make_request(
|
||||
|
||||
@@ -26,8 +26,8 @@ from twisted.internet.interfaces import IAddress, IResolutionReceiver
|
||||
from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
|
||||
|
||||
from synapse.config.oembed import OEmbedEndpointConfig
|
||||
from synapse.media.url_previewer import IMAGE_CACHE_EXPIRY_MS
|
||||
from synapse.rest.media.media_repository_resource import MediaRepositoryResource
|
||||
from synapse.rest.media.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
|
||||
from synapse.server import HomeServer
|
||||
from synapse.types import JsonDict
|
||||
from synapse.util import Clock
|
||||
@@ -36,7 +36,6 @@ from synapse.util.stringutils import parse_and_validate_mxc_uri
|
||||
from tests import unittest
|
||||
from tests.server import FakeTransport
|
||||
from tests.test_utils import SMALL_PNG
|
||||
from tests.utils import MockClock
|
||||
|
||||
try:
|
||||
import lxml
|
||||
@@ -117,8 +116,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
return hs
|
||||
|
||||
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||
self.media_repo = hs.get_media_repository_resource()
|
||||
self.preview_url = self.media_repo.children[b"preview_url"]
|
||||
self.media_repo = hs.get_media_repository()
|
||||
media_repo_resource = hs.get_media_repository_resource()
|
||||
self.preview_url = media_repo_resource.children[b"preview_url"]
|
||||
|
||||
self.lookups: Dict[str, Any] = {}
|
||||
|
||||
@@ -193,9 +193,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
)
|
||||
|
||||
# Clear the in-memory cache
|
||||
self.assertIn("http://matrix.org", self.preview_url._cache)
|
||||
self.preview_url._cache.pop("http://matrix.org")
|
||||
self.assertNotIn("http://matrix.org", self.preview_url._cache)
|
||||
self.assertIn("http://matrix.org", self.preview_url._url_previewer._cache)
|
||||
self.preview_url._url_previewer._cache.pop("http://matrix.org")
|
||||
self.assertNotIn("http://matrix.org", self.preview_url._url_previewer._cache)
|
||||
|
||||
# Check the database cache returns the correct response
|
||||
channel = self.make_request(
|
||||
@@ -1073,7 +1073,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
"""Test that files are not stored in or fetched from storage providers."""
|
||||
host, media_id = self._download_image()
|
||||
|
||||
rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
|
||||
rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
|
||||
media_store_path = os.path.join(self.media_store_path, rel_file_path)
|
||||
storage_provider_path = os.path.join(self.storage_path, rel_file_path)
|
||||
|
||||
@@ -1116,7 +1116,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
host, media_id = self._download_image()
|
||||
|
||||
rel_thumbnail_path = (
|
||||
self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id)
|
||||
self.media_repo.filepaths.url_cache_thumbnail_directory_rel(media_id)
|
||||
)
|
||||
media_store_thumbnail_path = os.path.join(
|
||||
self.media_store_path, rel_thumbnail_path
|
||||
@@ -1143,7 +1143,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
self.assertEqual(channel.code, 200)
|
||||
|
||||
# Remove the original, otherwise thumbnails will regenerate
|
||||
rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
|
||||
rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
|
||||
media_store_path = os.path.join(self.media_store_path, rel_file_path)
|
||||
os.remove(media_store_path)
|
||||
|
||||
@@ -1166,26 +1166,24 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||
|
||||
def test_cache_expiry(self) -> None:
|
||||
"""Test that URL cache files and thumbnails are cleaned up properly on expiry."""
|
||||
self.preview_url.clock = MockClock()
|
||||
|
||||
_host, media_id = self._download_image()
|
||||
|
||||
file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
|
||||
file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
|
||||
file_path = self.media_repo.filepaths.url_cache_filepath(media_id)
|
||||
file_dirs = self.media_repo.filepaths.url_cache_filepath_dirs_to_delete(
|
||||
media_id
|
||||
)
|
||||
thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
|
||||
thumbnail_dir = self.media_repo.filepaths.url_cache_thumbnail_directory(
|
||||
media_id
|
||||
)
|
||||
thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
|
||||
thumbnail_dirs = self.media_repo.filepaths.url_cache_thumbnail_dirs_to_delete(
|
||||
media_id
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(file_path))
|
||||
self.assertTrue(os.path.isdir(thumbnail_dir))
|
||||
|
||||
self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
|
||||
self.get_success(self.preview_url._expire_url_cache_data())
|
||||
self.reactor.advance(IMAGE_CACHE_EXPIRY_MS * 1000 + 1)
|
||||
self.get_success(self.preview_url._url_previewer._expire_url_cache_data())
|
||||
|
||||
for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
|
||||
self.assertFalse(
|
||||
|
||||
@@ -266,6 +266,10 @@ class OptionsResourceTests(unittest.TestCase):
|
||||
[b"X-Requested-With, Content-Type, Authorization, Date"],
|
||||
"has correct CORS Headers header",
|
||||
)
|
||||
self.assertEqual(
|
||||
channel.headers.getRawHeaders(b"Access-Control-Expose-Headers"),
|
||||
[b"Synapse-Trace-Id"],
|
||||
)
|
||||
|
||||
def _check_cors_msc3886_headers(self, channel: FakeChannel) -> None:
|
||||
# Ensure the correct CORS headers have been added
|
||||
|
||||
Reference in New Issue
Block a user