Ensure the url previewer also hashes and quarantines media (#18297)

Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
This commit is contained in:
Will Hunt
2025-05-06 11:04:31 +01:00
committed by GitHub
parent c9adbc6a1c
commit d0873d549a
3 changed files with 15 additions and 4 deletions

1
changelog.d/18297.misc Normal file
View File

@@ -0,0 +1 @@
Apply file hashing and existing quarantines to media downloaded for URL previews.

View File

@@ -378,7 +378,6 @@ class MediaRepository:
media_length=content_length,
user_id=auth_user,
sha256=sha256,
# TODO: Better name?
quarantined_by="system" if should_quarantine else None,
)

View File

@@ -41,7 +41,7 @@ from synapse.api.errors import Codes, SynapseError
from synapse.http.client import SimpleHttpClient
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.media._base import FileInfo, get_filename_from_headers
from synapse.media.media_storage import MediaStorage
from synapse.media.media_storage import MediaStorage, SHA256TransparentIOWriter
from synapse.media.oembed import OEmbedProvider
from synapse.media.preview_html import decode_body, parse_html_to_open_graph
from synapse.metrics.background_process_metrics import run_as_background_process
@@ -593,17 +593,26 @@ class UrlPreviewer:
file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
async with self.media_storage.store_into_file(file_info) as (f, fname):
sha256writer = SHA256TransparentIOWriter(f)
if url.startswith("data:"):
if not allow_data_urls:
raise SynapseError(
500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
)
download_result = await self._parse_data_url(url, f)
download_result = await self._parse_data_url(url, sha256writer.wrap())
else:
download_result = await self._download_url(url, f)
download_result = await self._download_url(url, sha256writer.wrap())
try:
sha256 = sha256writer.hexdigest()
should_quarantine = await self.store.get_is_hash_quarantined(sha256)
if should_quarantine:
logger.warn(
"Media has been automatically quarantined as it matched existing quarantined media"
)
time_now_ms = self.clock.time_msec()
await self.store.store_local_media(
@@ -614,6 +623,8 @@ class UrlPreviewer:
media_length=download_result.length,
user_id=user,
url_cache=url,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)
except Exception as e: