diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..ca333961f3 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,12 @@ +[run] +branch = True +parallel = True +source = synapse + +[paths] +source= + coverage + +[report] +precision = 2 +ignore_errors = True diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md similarity index 62% rename from .github/ISSUE_TEMPLATE.md rename to .github/ISSUE_TEMPLATE/BUG_REPORT.md index 21acb3202a..756759c2d8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md @@ -1,3 +1,9 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + marks will be invisible in the report. --> ### Description -Describe here the problem that you are experiencing, or the feature you are requesting. + ### Steps to reproduce -- For bugs, list the steps +- list the steps - that reproduce the bug - using hyphens as bullet points + +``` (three backticks, on a line on their own), so that they are formatted legibly. +--> ### Version information -- **Homeserver**: Was this issue identified on matrix.org or another homeserver? + +- **Homeserver**: If not matrix.org: -- **Version**: What version of Synapse is running? -- **Install method**: package manager/git clone/pip -- **Platform**: Tell us about the environment in which your homeserver is operating - - distro, hardware, if it's running in a vm/container, etc. +- **Version**: + +- **Install method**: + + +- **Platform**: + diff --git a/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md b/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md new file mode 100644 index 0000000000..150a46f505 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md @@ -0,0 +1,9 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +**Description:** + + diff --git a/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md b/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md new file mode 100644 index 0000000000..77581596c4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/SUPPORT_REQUEST.md @@ -0,0 +1,9 @@ +--- +name: Support request +about: I need support for Synapse + +--- + +# Please ask for support in [**#matrix:matrix.org**](https://matrix.to/#/#matrix:matrix.org) + +## Don't file an issue as a support request. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000..aa883ba505 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +### Pull Request Checklist + + + +* [ ] Pull request is based on the develop branch +* [ ] Pull request includes a [changelog file](CONTRIBUTING.rst#changelog) +* [ ] Pull request includes a [sign off](CONTRIBUTING.rst#sign-off) diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md new file mode 100644 index 0000000000..7a4244f673 --- /dev/null +++ b/.github/SUPPORT.md @@ -0,0 +1,3 @@ +[**#matrix:matrix.org**](https://matrix.to/#/#matrix:matrix.org) is the official support room for Matrix, and can be accessed by any client from https://matrix.org/docs/projects/try-matrix-now.html + +It can also be access via IRC bridge at irc://irc.freenode.net/matrix or on the web here: https://webchat.freenode.net/?channels=matrix diff --git a/CHANGES.md b/CHANGES.md index 8302610585..1c3d575c37 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,64 @@ +Synapse 0.33.9 (2018-11-19) +=========================== + +No significant changes. + + +Synapse 0.33.9rc1 (2018-11-14) +============================== + +Features +-------- + +- Include flags to optionally add `m.login.terms` to the registration flow when consent tracking is enabled. ([\#4004](https://github.com/matrix-org/synapse/issues/4004), [\#4133](https://github.com/matrix-org/synapse/issues/4133), [\#4142](https://github.com/matrix-org/synapse/issues/4142), [\#4184](https://github.com/matrix-org/synapse/issues/4184)) +- Support for replacing rooms with new ones ([\#4091](https://github.com/matrix-org/synapse/issues/4091), [\#4099](https://github.com/matrix-org/synapse/issues/4099), [\#4100](https://github.com/matrix-org/synapse/issues/4100), [\#4101](https://github.com/matrix-org/synapse/issues/4101)) + + +Bugfixes +-------- + +- Fix exceptions when using the email mailer on Python 3. ([\#4095](https://github.com/matrix-org/synapse/issues/4095)) +- Fix e2e key backup with more than 9 backup versions ([\#4113](https://github.com/matrix-org/synapse/issues/4113)) +- Searches that request profile info now no longer fail with a 500. ([\#4122](https://github.com/matrix-org/synapse/issues/4122)) +- fix return code of empty key backups ([\#4123](https://github.com/matrix-org/synapse/issues/4123)) +- If the typing stream ID goes backwards (as on a worker when the master restarts), the worker's typing handler will no longer erroneously report rooms containing new typing events. ([\#4127](https://github.com/matrix-org/synapse/issues/4127)) +- Fix table lock of device_lists_remote_cache which could freeze the application ([\#4132](https://github.com/matrix-org/synapse/issues/4132)) +- Fix exception when using state res v2 algorithm ([\#4135](https://github.com/matrix-org/synapse/issues/4135)) +- Generating the user consent URI no longer fails on Python 3. ([\#4140](https://github.com/matrix-org/synapse/issues/4140), [\#4163](https://github.com/matrix-org/synapse/issues/4163)) +- Loading URL previews from the DB cache on Postgres will no longer cause Unicode type errors when responding to the request, and URL previews will no longer fail if the remote server returns a Content-Type header with the chartype in quotes. ([\#4157](https://github.com/matrix-org/synapse/issues/4157)) +- The hash_password script now works on Python 3. ([\#4161](https://github.com/matrix-org/synapse/issues/4161)) +- Fix noop checks when updating device keys, reducing spurious device list update notifications. ([\#4164](https://github.com/matrix-org/synapse/issues/4164)) + + +Deprecations and Removals +------------------------- + +- The disused and un-specced identicon generator has been removed. ([\#4106](https://github.com/matrix-org/synapse/issues/4106)) +- The obsolete and non-functional /pull federation endpoint has been removed. ([\#4118](https://github.com/matrix-org/synapse/issues/4118)) +- The deprecated v1 key exchange endpoints have been removed. ([\#4119](https://github.com/matrix-org/synapse/issues/4119)) +- Synapse will no longer fetch keys using the fallback deprecated v1 key exchange method and will now always use v2. ([\#4120](https://github.com/matrix-org/synapse/issues/4120)) + + +Internal Changes +---------------- + +- Fix build of Docker image with docker-compose ([\#3778](https://github.com/matrix-org/synapse/issues/3778)) +- Delete unreferenced state groups during history purge ([\#4006](https://github.com/matrix-org/synapse/issues/4006)) +- The "Received rdata" log messages on workers is now logged at DEBUG, not INFO. ([\#4108](https://github.com/matrix-org/synapse/issues/4108)) +- Reduce replication traffic for device lists ([\#4109](https://github.com/matrix-org/synapse/issues/4109)) +- Fix `synapse_replication_tcp_protocol_*_commands` metric label to be full command name, rather than just the first character ([\#4110](https://github.com/matrix-org/synapse/issues/4110)) +- Log some bits about room creation ([\#4121](https://github.com/matrix-org/synapse/issues/4121)) +- Fix `tox` failure on old systems ([\#4124](https://github.com/matrix-org/synapse/issues/4124)) +- Add STATE_V2_TEST room version ([\#4128](https://github.com/matrix-org/synapse/issues/4128)) +- Clean up event accesses and tests ([\#4137](https://github.com/matrix-org/synapse/issues/4137)) +- The default logging config will now set an explicit log file encoding of UTF-8. ([\#4138](https://github.com/matrix-org/synapse/issues/4138)) +- Add helpers functions for getting prev and auth events of an event ([\#4139](https://github.com/matrix-org/synapse/issues/4139)) +- Add some tests for the HTTP pusher. ([\#4149](https://github.com/matrix-org/synapse/issues/4149)) +- add purge_history.sh and purge_remote_media.sh scripts to contrib/ ([\#4155](https://github.com/matrix-org/synapse/issues/4155)) +- HTTP tests have been refactored to contain less boilerplate. ([\#4156](https://github.com/matrix-org/synapse/issues/4156)) +- Drop incoming events from federation for unknown rooms ([\#4165](https://github.com/matrix-org/synapse/issues/4165)) + + Synapse 0.33.8 (2018-11-01) =========================== diff --git a/MANIFEST.in b/MANIFEST.in index 25cdf0a61b..d0e49713da 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -34,6 +34,7 @@ prune .github prune demo/etc prune docker prune .circleci +prune .coveragerc exclude jenkins* recursive-exclude jenkins *.sh diff --git a/README.rst b/README.rst index 9165db8319..e52b776902 100644 --- a/README.rst +++ b/README.rst @@ -142,7 +142,7 @@ Installing prerequisites on openSUSE:: Installing prerequisites on OpenBSD:: doas pkg_add python libffi py-pip py-setuptools sqlite3 py-virtualenv \ - libxslt + libxslt jpeg To install the Synapse homeserver run:: @@ -729,9 +729,10 @@ port: .. __: `key_management`_ -* Synapse does not currently support SNI on the federation protocol - (`bug #1491 `_), which - means that using name-based virtual hosting is unreliable. +* Until v0.33.3, Synapse did not support SNI on the federation port + (`bug #1491 `_). This bug + is now fixed, but means that federating with older servers can be unreliable + when using name-based virtual hosting. Furthermore, a number of the normal reasons for using a reverse-proxy do not apply: diff --git a/changelog.d/3778.misc b/changelog.d/3778.misc deleted file mode 100644 index b78a2c9f42..0000000000 --- a/changelog.d/3778.misc +++ /dev/null @@ -1 +0,0 @@ -Fix build of Docker image with docker-compose diff --git a/changelog.d/3830.feature b/changelog.d/3830.feature new file mode 100644 index 0000000000..af472cf763 --- /dev/null +++ b/changelog.d/3830.feature @@ -0,0 +1 @@ +Add option to track MAU stats (but not limit people) diff --git a/changelog.d/4004.feature b/changelog.d/4004.feature deleted file mode 100644 index 89975f4c6e..0000000000 --- a/changelog.d/4004.feature +++ /dev/null @@ -1 +0,0 @@ -Include flags to optionally add `m.login.terms` to the registration flow when consent tracking is enabled. diff --git a/changelog.d/4006.misc b/changelog.d/4006.misc deleted file mode 100644 index 35ffa1c2d2..0000000000 --- a/changelog.d/4006.misc +++ /dev/null @@ -1 +0,0 @@ -Delete unreferenced state groups during history purge diff --git a/changelog.d/4091.feature b/changelog.d/4091.feature deleted file mode 100644 index a3f7dbdcdd..0000000000 --- a/changelog.d/4091.feature +++ /dev/null @@ -1 +0,0 @@ -Support for replacing rooms with new ones diff --git a/changelog.d/4095.bugfix b/changelog.d/4095.bugfix deleted file mode 100644 index 76ee7148c2..0000000000 --- a/changelog.d/4095.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix exceptions when using the email mailer on Python 3. diff --git a/changelog.d/4099.feature b/changelog.d/4099.feature deleted file mode 100644 index a3f7dbdcdd..0000000000 --- a/changelog.d/4099.feature +++ /dev/null @@ -1 +0,0 @@ -Support for replacing rooms with new ones diff --git a/changelog.d/4100.feature b/changelog.d/4100.feature deleted file mode 100644 index a3f7dbdcdd..0000000000 --- a/changelog.d/4100.feature +++ /dev/null @@ -1 +0,0 @@ -Support for replacing rooms with new ones diff --git a/changelog.d/4101.feature b/changelog.d/4101.feature deleted file mode 100644 index a3f7dbdcdd..0000000000 --- a/changelog.d/4101.feature +++ /dev/null @@ -1 +0,0 @@ -Support for replacing rooms with new ones diff --git a/changelog.d/4106.removal b/changelog.d/4106.removal deleted file mode 100644 index 7e63208daa..0000000000 --- a/changelog.d/4106.removal +++ /dev/null @@ -1 +0,0 @@ -The disused and un-specced identicon generator has been removed. diff --git a/changelog.d/4108.misc b/changelog.d/4108.misc deleted file mode 100644 index 85810c3d83..0000000000 --- a/changelog.d/4108.misc +++ /dev/null @@ -1 +0,0 @@ -The "Received rdata" log messages on workers is now logged at DEBUG, not INFO. diff --git a/changelog.d/4109.misc b/changelog.d/4109.misc deleted file mode 100644 index 566c683119..0000000000 --- a/changelog.d/4109.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce replication traffic for device lists diff --git a/changelog.d/4110.misc b/changelog.d/4110.misc deleted file mode 100644 index a50327ae34..0000000000 --- a/changelog.d/4110.misc +++ /dev/null @@ -1 +0,0 @@ -Fix `synapse_replication_tcp_protocol_*_commands` metric label to be full command name, rather than just the first character diff --git a/changelog.d/4118.removal b/changelog.d/4118.removal deleted file mode 100644 index 6fb1d67b47..0000000000 --- a/changelog.d/4118.removal +++ /dev/null @@ -1 +0,0 @@ -The obsolete and non-functional /pull federation endpoint has been removed. diff --git a/changelog.d/4119.removal b/changelog.d/4119.removal deleted file mode 100644 index 81383ece6b..0000000000 --- a/changelog.d/4119.removal +++ /dev/null @@ -1 +0,0 @@ -The deprecated v1 key exchange endpoints have been removed. diff --git a/changelog.d/4120.removal b/changelog.d/4120.removal deleted file mode 100644 index a7a567098f..0000000000 --- a/changelog.d/4120.removal +++ /dev/null @@ -1 +0,0 @@ -Synapse will no longer fetch keys using the fallback deprecated v1 key exchange method and will now always use v2. diff --git a/changelog.d/4121.misc b/changelog.d/4121.misc deleted file mode 100644 index 9c29d80c3f..0000000000 --- a/changelog.d/4121.misc +++ /dev/null @@ -1 +0,0 @@ -Log some bits about room creation diff --git a/changelog.d/4122.bugfix b/changelog.d/4122.bugfix deleted file mode 100644 index 66dcfb18b9..0000000000 --- a/changelog.d/4122.bugfix +++ /dev/null @@ -1 +0,0 @@ -Searches that request profile info now no longer fail with a 500. diff --git a/changelog.d/4123.bugfix b/changelog.d/4123.bugfix deleted file mode 100644 index b82bc2aad3..0000000000 --- a/changelog.d/4123.bugfix +++ /dev/null @@ -1 +0,0 @@ -fix return code of empty key backups diff --git a/changelog.d/4124.misc b/changelog.d/4124.misc deleted file mode 100644 index 28f438b9b2..0000000000 --- a/changelog.d/4124.misc +++ /dev/null @@ -1 +0,0 @@ -Fix `tox` failure on old systems diff --git a/changelog.d/4127.bugfix b/changelog.d/4127.bugfix deleted file mode 100644 index 0701d2ceaa..0000000000 --- a/changelog.d/4127.bugfix +++ /dev/null @@ -1 +0,0 @@ -If the typing stream ID goes backwards (as on a worker when the master restarts), the worker's typing handler will no longer erroneously report rooms containing new typing events. diff --git a/changelog.d/4128.misc b/changelog.d/4128.misc deleted file mode 100644 index 76ab4b085c..0000000000 --- a/changelog.d/4128.misc +++ /dev/null @@ -1 +0,0 @@ -Add STATE_V2_TEST room version diff --git a/changelog.d/4132.bugfix b/changelog.d/4132.bugfix deleted file mode 100644 index 2304a40f05..0000000000 --- a/changelog.d/4132.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix table lock of device_lists_remote_cache which could freeze the application \ No newline at end of file diff --git a/changelog.d/4133.feature b/changelog.d/4133.feature deleted file mode 100644 index 89975f4c6e..0000000000 --- a/changelog.d/4133.feature +++ /dev/null @@ -1 +0,0 @@ -Include flags to optionally add `m.login.terms` to the registration flow when consent tracking is enabled. diff --git a/changelog.d/4135.bugfix b/changelog.d/4135.bugfix deleted file mode 100644 index 6879b1c162..0000000000 --- a/changelog.d/4135.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix exception when using state res v2 algorithm diff --git a/changelog.d/4137.misc b/changelog.d/4137.misc deleted file mode 100644 index 4fe933e33c..0000000000 --- a/changelog.d/4137.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up event accesses and tests diff --git a/changelog.d/4138.misc b/changelog.d/4138.misc deleted file mode 100644 index 300199f8e8..0000000000 --- a/changelog.d/4138.misc +++ /dev/null @@ -1 +0,0 @@ -The default logging config will now set an explicit log file encoding of UTF-8. diff --git a/changelog.d/4139.misc b/changelog.d/4139.misc deleted file mode 100644 index d63d9e7003..0000000000 --- a/changelog.d/4139.misc +++ /dev/null @@ -1 +0,0 @@ -Add helpers functions for getting prev and auth events of an event diff --git a/changelog.d/4140.bugfix b/changelog.d/4140.bugfix deleted file mode 100644 index c7e0ee229d..0000000000 --- a/changelog.d/4140.bugfix +++ /dev/null @@ -1 +0,0 @@ -Generating the user consent URI no longer fails on Python 3. diff --git a/changelog.d/4142.feature b/changelog.d/4142.feature deleted file mode 100644 index 89975f4c6e..0000000000 --- a/changelog.d/4142.feature +++ /dev/null @@ -1 +0,0 @@ -Include flags to optionally add `m.login.terms` to the registration flow when consent tracking is enabled. diff --git a/changelog.d/4149.misc b/changelog.d/4149.misc deleted file mode 100644 index 0b299f0c6e..0000000000 --- a/changelog.d/4149.misc +++ /dev/null @@ -1 +0,0 @@ -Add some tests for the HTTP pusher. diff --git a/changelog.d/4155.misc b/changelog.d/4155.misc deleted file mode 100644 index 4a7d5acb66..0000000000 --- a/changelog.d/4155.misc +++ /dev/null @@ -1 +0,0 @@ -add purge_history.sh and purge_remote_media.sh scripts to contrib/ diff --git a/changelog.d/4156.misc b/changelog.d/4156.misc deleted file mode 100644 index 20d404406c..0000000000 --- a/changelog.d/4156.misc +++ /dev/null @@ -1 +0,0 @@ -HTTP tests have been refactored to contain less boilerplate. diff --git a/changelog.d/4157.bugfix b/changelog.d/4157.bugfix deleted file mode 100644 index 265514c3af..0000000000 --- a/changelog.d/4157.bugfix +++ /dev/null @@ -1 +0,0 @@ -Loading URL previews from the DB cache on Postgres will no longer cause Unicode type errors when responding to the request, and URL previews will no longer fail if the remote server returns a Content-Type header with the chartype in quotes. \ No newline at end of file diff --git a/changelog.d/4161.bugfix b/changelog.d/4161.bugfix deleted file mode 100644 index 252a40376b..0000000000 --- a/changelog.d/4161.bugfix +++ /dev/null @@ -1 +0,0 @@ -The hash_password script now works on Python 3. diff --git a/changelog.d/4176.bugfix b/changelog.d/4176.bugfix new file mode 100644 index 0000000000..3846f8a27b --- /dev/null +++ b/changelog.d/4176.bugfix @@ -0,0 +1 @@ +The media repository now no longer fails to decode UTF-8 filenames when downloading remote media. diff --git a/changelog.d/4180.misc b/changelog.d/4180.misc new file mode 100644 index 0000000000..80194b3dc0 --- /dev/null +++ b/changelog.d/4180.misc @@ -0,0 +1 @@ +A coveragerc file, as well as the py36-coverage tox target, have been added. diff --git a/changelog.d/4182.misc b/changelog.d/4182.misc new file mode 100644 index 0000000000..62949a065a --- /dev/null +++ b/changelog.d/4182.misc @@ -0,0 +1 @@ +Add a GitHub pull request template and add multiple issue templates diff --git a/changelog.d/4183.bugfix b/changelog.d/4183.bugfix new file mode 100644 index 0000000000..3e9ba3826f --- /dev/null +++ b/changelog.d/4183.bugfix @@ -0,0 +1 @@ +URL previews now correctly decode non-UTF-8 text if the header contains a `") + + # Now we get the room ID so that we can check that we know the + # version of the room. + room_id = p.get("room_id") + if not room_id: + logger.info( + "Ignoring PDU as does not have a room_id. Event ID: %s", + possible_event_id, + ) + continue + + try: + # In future we will actually use the room version to parse the + # PDU into an event. + yield self.store.get_room_version(room_id) + except NotFoundError: + logger.info("Ignoring PDU for unknown room_id: %s", room_id) + continue + event = event_from_pdu_json(p) - room_id = event.room_id pdus_by_room.setdefault(room_id, []).append(event) pdu_results = {} diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index a958c45271..c6e89db4bc 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -473,7 +473,7 @@ class AuthHandler(BaseHandler): "version": self.hs.config.user_consent_version, "en": { "name": self.hs.config.user_consent_policy_name, - "url": "%s/_matrix/consent?v=%s" % ( + "url": "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 9ca5fd8724..a3bb864bb2 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -202,27 +202,22 @@ class FederationHandler(BaseHandler): self.room_queues[room_id].append((pdu, origin)) return - # If we're no longer in the room just ditch the event entirely. This - # is probably an old server that has come back and thinks we're still - # in the room (or we've been rejoined to the room by a state reset). + # If we're not in the room just ditch the event entirely. This is + # probably an old server that has come back and thinks we're still in + # the room (or we've been rejoined to the room by a state reset). # - # If we were never in the room then maybe our database got vaped and - # we should check if we *are* in fact in the room. If we are then we - # can magically rejoin the room. + # Note that if we were never in the room then we would have already + # dropped the event, since we wouldn't know the room version. is_in_room = yield self.auth.check_host_in_room( room_id, self.server_name ) if not is_in_room: - was_in_room = yield self.store.was_host_joined( - pdu.room_id, self.server_name, + logger.info( + "[%s %s] Ignoring PDU from %s as we're not in the room", + room_id, event_id, origin, ) - if was_in_room: - logger.info( - "[%s %s] Ignoring PDU from %s as we've left the room", - room_id, event_id, origin, - ) - defer.returnValue(None) + defer.returnValue(None) state = None auth_chain = [] @@ -557,86 +552,54 @@ class FederationHandler(BaseHandler): room_id, event_id, event, ) - # FIXME (erikj): Awful hack to make the case where we are not currently - # in the room work - # If state and auth_chain are None, then we don't need to do this check - # as we already know we have enough state in the DB to handle this - # event. - if state and auth_chain and not event.internal_metadata.is_outlier(): - is_in_room = yield self.auth.check_host_in_room( - room_id, - self.server_name - ) - else: - is_in_room = True + event_ids = set() + if state: + event_ids |= {e.event_id for e in state} + if auth_chain: + event_ids |= {e.event_id for e in auth_chain} + + seen_ids = yield self.store.have_seen_events(event_ids) + + if state and auth_chain is not None: + # If we have any state or auth_chain given to us by the replication + # layer, then we should handle them (if we haven't before.) + + event_infos = [] + + for e in itertools.chain(auth_chain, state): + if e.event_id in seen_ids: + continue + e.internal_metadata.outlier = True + auth_ids = e.auth_event_ids() + auth = { + (e.type, e.state_key): e for e in auth_chain + if e.event_id in auth_ids or e.type == EventTypes.Create + } + event_infos.append({ + "event": e, + "auth_events": auth, + }) + seen_ids.add(e.event_id) - if not is_in_room: logger.info( - "[%s %s] Got event for room we're not in", - room_id, event_id, + "[%s %s] persisting newly-received auth/state events %s", + room_id, event_id, [e["event"].event_id for e in event_infos] ) + yield self._handle_new_events(origin, event_infos) - try: - yield self._persist_auth_tree( - origin, auth_chain, state, event - ) - except AuthError as e: - raise FederationError( - "ERROR", - e.code, - e.msg, - affected=event_id, - ) - - else: - event_ids = set() - if state: - event_ids |= {e.event_id for e in state} - if auth_chain: - event_ids |= {e.event_id for e in auth_chain} - - seen_ids = yield self.store.have_seen_events(event_ids) - - if state and auth_chain is not None: - # If we have any state or auth_chain given to us by the replication - # layer, then we should handle them (if we haven't before.) - - event_infos = [] - - for e in itertools.chain(auth_chain, state): - if e.event_id in seen_ids: - continue - e.internal_metadata.outlier = True - auth_ids = e.auth_event_ids() - auth = { - (e.type, e.state_key): e for e in auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - event_infos.append({ - "event": e, - "auth_events": auth, - }) - seen_ids.add(e.event_id) - - logger.info( - "[%s %s] persisting newly-received auth/state events %s", - room_id, event_id, [e["event"].event_id for e in event_infos] - ) - yield self._handle_new_events(origin, event_infos) - - try: - context = yield self._handle_new_event( - origin, - event, - state=state, - ) - except AuthError as e: - raise FederationError( - "ERROR", - e.code, - e.msg, - affected=event.event_id, - ) + try: + context = yield self._handle_new_event( + origin, + event, + state=state, + ) + except AuthError as e: + raise FederationError( + "ERROR", + e.code, + e.msg, + affected=event.event_id, + ) room = yield self.store.get_room(room_id) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index a1e4b88e6d..528125e737 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -121,16 +121,15 @@ def parse_string(request, name, default=None, required=False, Args: request: the twisted HTTP request. - name (bytes/unicode): the name of the query parameter. - default (bytes/unicode|None): value to use if the parameter is absent, + name (bytes|unicode): the name of the query parameter. + default (bytes|unicode|None): value to use if the parameter is absent, defaults to None. Must be bytes if encoding is None. required (bool): whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - allowed_values (list[bytes/unicode]): List of allowed values for the + allowed_values (list[bytes|unicode]): List of allowed values for the string, or None if any value is allowed, defaults to None. Must be the same type as name, if given. - encoding: The encoding to decode the name to, and decode the string - content with. + encoding (str|None): The encoding to decode the string content with. Returns: bytes/unicode|None: A string value or the default. Unicode if encoding diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index ebcb93bfc7..1eb5be0957 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -36,6 +36,7 @@ from synapse.push.presentable_names import ( ) from synapse.types import UserID from synapse.util.async_helpers import concurrently_execute +from synapse.util.logcontext import make_deferred_yieldable from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) @@ -192,7 +193,7 @@ class Mailer(object): logger.info("Sending email push notification to %s" % email_address) - yield self.sendmail( + yield make_deferred_yieldable(self.sendmail( self.hs.config.email_smtp_host, raw_from, raw_to, multipart_msg.as_string().encode('utf8'), reactor=self.hs.get_reactor(), @@ -201,7 +202,7 @@ class Mailer(object): username=self.hs.config.email_smtp_user, password=self.hs.config.email_smtp_pass, requireTransportSecurity=self.hs.config.require_transport_security - ) + )) @defer.inlineCallbacks def get_room_vars(self, room_id, user_id, notifs, notif_events, room_state_ids): diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 0010699d31..f6b4a85e40 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -27,7 +27,7 @@ from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError from synapse.http.server import finish_request -from synapse.http.servlet import parse_json_object_from_request +from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import UserID from synapse.util.msisdn import phone_number_to_msisdn @@ -83,6 +83,7 @@ class LoginRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns("/login$") SAML2_TYPE = "m.login.saml2" CAS_TYPE = "m.login.cas" + SSO_TYPE = "m.login.sso" TOKEN_TYPE = "m.login.token" JWT_TYPE = "m.login.jwt" @@ -105,6 +106,10 @@ class LoginRestServlet(ClientV1RestServlet): if self.saml2_enabled: flows.append({"type": LoginRestServlet.SAML2_TYPE}) if self.cas_enabled: + flows.append({"type": LoginRestServlet.SSO_TYPE}) + + # we advertise CAS for backwards compat, though MSC1721 renamed it + # to SSO. flows.append({"type": LoginRestServlet.CAS_TYPE}) # While its valid for us to advertise this login type generally, @@ -384,11 +389,11 @@ class SAML2RestServlet(ClientV1RestServlet): defer.returnValue((200, {"status": "not_authenticated"})) -class CasRedirectServlet(ClientV1RestServlet): - PATTERNS = client_path_patterns("/login/cas/redirect", releases=()) +class CasRedirectServlet(RestServlet): + PATTERNS = client_path_patterns("/login/(cas|sso)/redirect") def __init__(self, hs): - super(CasRedirectServlet, self).__init__(hs) + super(CasRedirectServlet, self).__init__() self.cas_server_url = hs.config.cas_server_url.encode('ascii') self.cas_service_url = hs.config.cas_service_url.encode('ascii') diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py index a8d8ed6590..fa73bdf3a1 100644 --- a/synapse/rest/client/v2_alpha/auth.py +++ b/synapse/rest/client/v2_alpha/auth.py @@ -21,7 +21,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import SynapseError from synapse.api.urls import CLIENT_V2_ALPHA_PREFIX from synapse.http.server import finish_request -from synapse.http.servlet import RestServlet +from synapse.http.servlet import RestServlet, parse_string from ._base import client_v2_patterns @@ -131,16 +131,12 @@ class AuthRestServlet(RestServlet): self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_handlers().registration_handler - @defer.inlineCallbacks def on_GET(self, request, stagetype): - yield + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + if stagetype == LoginType.RECAPTCHA: - if ('session' not in request.args or - len(request.args['session']) == 0): - raise SynapseError(400, "No session supplied") - - session = request.args["session"][0] - html = RECAPTCHA_TEMPLATE % { 'session': session, 'myurl': "%s/auth/%s/fallback/web" % ( @@ -155,13 +151,11 @@ class AuthRestServlet(RestServlet): request.write(html_bytes) finish_request(request) - defer.returnValue(None) + return None elif stagetype == LoginType.TERMS: - session = request.args['session'][0] - html = TERMS_TEMPLATE % { 'session': session, - 'terms_url': "%s/_matrix/consent?v=%s" % ( + 'terms_url': "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), @@ -176,25 +170,25 @@ class AuthRestServlet(RestServlet): request.write(html_bytes) finish_request(request) - defer.returnValue(None) + return None else: raise SynapseError(404, "Unknown auth stage type") @defer.inlineCallbacks def on_POST(self, request, stagetype): - yield - if stagetype == LoginType.RECAPTCHA: - if ('g-recaptcha-response' not in request.args or - len(request.args['g-recaptcha-response'])) == 0: - raise SynapseError(400, "No captcha response supplied") - if ('session' not in request.args or - len(request.args['session'])) == 0: - raise SynapseError(400, "No session supplied") - session = request.args['session'][0] + session = parse_string(request, "session") + if not session: + raise SynapseError(400, "No session supplied") + + if stagetype == LoginType.RECAPTCHA: + response = parse_string(request, "g-recaptcha-response") + + if not response: + raise SynapseError(400, "No captcha response supplied") authdict = { - 'response': request.args['g-recaptcha-response'][0], + 'response': response, 'session': session, } @@ -242,7 +236,7 @@ class AuthRestServlet(RestServlet): else: html = TERMS_TEMPLATE % { 'session': session, - 'terms_url': "%s/_matrix/consent?v=%s" % ( + 'terms_url': "%s_matrix/consent?v=%s" % ( self.hs.config.public_baseurl, self.hs.config.user_consent_version, ), diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index e0f7de5d5c..ad525b22e1 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -142,10 +142,10 @@ class ConsentResource(Resource): userhmac = None has_consented = False public_version = username == "" - if not public_version or not self.hs.config.user_consent_at_registration: - userhmac = parse_string(request, "h", required=True, encoding=None) + if not public_version: + userhmac_bytes = parse_string(request, "h", required=True, encoding=None) - self._check_hash(username, userhmac) + self._check_hash(username, userhmac_bytes) if username.startswith('@'): qualified_user_id = username @@ -155,13 +155,18 @@ class ConsentResource(Resource): u = yield self.store.get_user_by_id(qualified_user_id) if u is None: raise NotFoundError("Unknown user") + has_consented = u["consent_version"] == version + userhmac = userhmac_bytes.decode("ascii") try: self._render_template( request, "%s.html" % (version,), - user=username, userhmac=userhmac, version=version, - has_consented=has_consented, public_version=public_version, + user=username, + userhmac=userhmac, + version=version, + has_consented=has_consented, + public_version=public_version, ) except TemplateNotFound: raise NotFoundError("Unknown policy version") diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 76e479afa3..efe42a429d 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os +from six import PY3 from six.moves import urllib from twisted.internet import defer @@ -48,26 +49,21 @@ def parse_media_id(request): return server_name, media_id, file_name except Exception: raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN ) def respond_404(request): respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, ) @defer.inlineCallbacks -def respond_with_file(request, media_type, file_path, - file_size=None, upload_name=None): +def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): @@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename=%s" % (_quote(upload_name),) else: - disposition = ( - "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - request.setHeader(b"Content-Disposition", disposition) + request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + request.setHeader(b"Content-Length", b"%d" % (file_size,)) @defer.inlineCallbacks @@ -153,6 +144,7 @@ class Responder(object): Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + def write_to_consumer(self, consumer): """Stream response into consumer @@ -186,9 +178,18 @@ class FileInfo(object): thumbnail_method (str) thumbnail_type (str): Content type of thumbnail, e.g. image/png """ - def __init__(self, server_name, file_id, url_cache=False, - thumbnail=False, thumbnail_width=None, thumbnail_height=None, - thumbnail_method=None, thumbnail_type=None): + + def __init__( + self, + server_name, + file_id, + url_cache=False, + thumbnail=False, + thumbnail_width=None, + thumbnail_height=None, + thumbnail_method=None, + thumbnail_type=None, + ): self.server_name = server_name self.file_id = file_id self.url_cache = url_cache @@ -197,3 +198,74 @@ class FileInfo(object): self.thumbnail_height = thumbnail_height self.thumbnail_method = thumbnail_method self.thumbnail_type = thumbnail_type + + +def get_filename_from_headers(headers): + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers (twisted.web.http_headers.Headers): The HTTP + request headers. + + Returns: + A Unicode string of the filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b'']) + + # No header, bail out. + if not content_disposition[0]: + return + + # dict of unicode: bytes, corresponding to the key value sections of the + # Content-Disposition header. + params = {} + parts = content_disposition[0].split(b";") + for i in parts: + # Split into key-value pairs, if able + # We don't care about things like `inline`, so throw it out + if b"=" not in i: + continue + + key, value = i.strip().split(b"=") + params[key.decode('ascii')] = value + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + if PY3: + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode('ascii'), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + else: + # On Python 2, we first unquote the %-encoded parts and then + # decode it strictly using UTF-8. + try: + upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') + except UnicodeDecodeError: + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + # Make sure there's no %-quoted bytes. If there is, reject it as + # non-valid ASCII. + if b"%" not in upload_name_ascii: + upload_name = upload_name_ascii.decode('ascii') + + # This may be None here, indicating we did not find a matching name. + return upload_name diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d6c5f07af0..e117836e9a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -14,14 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import errno import logging import os import shutil -from six import PY3, iteritems -from six.moves.urllib import parse as urlparse +from six import iteritems import twisted.internet.error import twisted.web.http @@ -34,14 +32,18 @@ from synapse.api.errors import ( NotFoundError, SynapseError, ) -from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import logcontext from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string -from ._base import FileInfo, respond_404, respond_with_responder +from ._base import ( + FileInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths @@ -62,7 +64,7 @@ class MediaRepository(object): def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) + self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -397,39 +399,9 @@ class MediaRepository(object): yield finish() media_type = headers[b"Content-Type"][0].decode('ascii') - + upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - if PY3: - upload_name = urlparse.unquote(upload_name) - else: - upload_name = urlparse.unquote(upload_name.encode('ascii')) - try: - if isinstance(upload_name, bytes): - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 91d1dafe64..d0ecf241b6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import datetime import errno import fnmatch @@ -44,15 +43,19 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.media.v1._base import get_filename_from_headers from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string from ._base import FileInfo logger = logging.getLogger(__name__) +_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) +_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) + class PreviewUrlResource(Resource): isLeaf = True @@ -223,15 +226,25 @@ class PreviewUrlResource(Resource): with open(media_info['filename'], 'rb') as file: body = file.read() - # clobber the encoding from the content-type, or default to utf-8 - # XXX: this overrides any or XML charset headers in the body - # which may pose problems, but so far seems to work okay. - match = re.match( - r'.*; *charset="?(.*?)"?(;|$)', - media_info['media_type'], - re.I - ) - encoding = match.group(1) if match else "utf-8" + encoding = None + + # Let's try and figure out if it has an encoding set in a meta tag. + # Limit it to the first 1kb, since it ought to be in the meta tags + # at the top. + match = _charset_match.search(body[:1000]) + + # If we find a match, it should take precedence over the + # Content-Type header, so set it here. + if match: + encoding = match.group(1).decode('ascii') + + # If we don't find a match, we'll look at the HTTP Content-Type, and + # if that doesn't exist, we'll fall back to UTF-8. + if not encoding: + match = _content_type_match.match( + media_info['media_type'] + ) + encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info['uri'], encoding) @@ -323,31 +336,7 @@ class PreviewUrlResource(Resource): media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - download_name = None - - # First check if there is a valid UTF-8 filename - download_name_utf8 = params.get("filename*", None) - if download_name_utf8: - if download_name_utf8.lower().startswith("utf-8''"): - download_name = download_name_utf8[7:] - - # If there isn't check for an ascii name. - if not download_name: - download_name_ascii = params.get("filename", None) - if download_name_ascii and is_ascii(download_name_ascii): - download_name = download_name_ascii - - if download_name: - download_name = urlparse.unquote(download_name) - try: - download_name = download_name.decode("utf-8") - except UnicodeDecodeError: - download_name = None - else: - download_name = None + download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, diff --git a/synapse/static/client/login/index.html b/synapse/static/client/login/index.html index 96c8723cab..bcb6bc6bb7 100644 --- a/synapse/static/client/login/index.html +++ b/synapse/static/client/login/index.html @@ -12,35 +12,30 @@

Log in with one of the following methods

-
-
-