Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit a47636c

Browse files
Prevent local quarantined media from being claimed by media retention (#12972)
1 parent f7baffd commit a47636c

File tree

6 files changed

+185
-29
lines changed

6 files changed

+185
-29
lines changed

changelog.d/12972.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add new `media_retention` options to the homeserver config for routinely cleaning up non-recently accessed media.

docs/usage/configuration/config_documentation.md

+6
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,12 @@ been accessed, the media's creation time is used instead. Both thumbnails
15831583
and the original media will be removed. If either of these options are unset,
15841584
then media of that type will not be purged.
15851585

1586+
Local or cached remote media that has been
1587+
[quarantined](../../admin_api/media_admin_api.md#quarantining-media-in-a-room)
1588+
will not be deleted. Similarly, local media that has been marked as
1589+
[protected from quarantine](../../admin_api/media_admin_api.md#protecting-media-from-being-quarantined)
1590+
will not be deleted.
1591+
15861592
Example configuration:
15871593
```yaml
15881594
media_retention:

synapse/rest/admin/media.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ async def on_POST(
8383
requester = await self.auth.get_user_by_req(request)
8484
await assert_user_is_admin(self.auth, requester.user)
8585

86-
logging.info("Quarantining local media by user: %s", user_id)
86+
logging.info("Quarantining media by user: %s", user_id)
8787

8888
# Quarantine all media this user has uploaded
8989
num_quarantined = await self.store.quarantine_media_ids_by_user(
@@ -112,7 +112,7 @@ async def on_POST(
112112
requester = await self.auth.get_user_by_req(request)
113113
await assert_user_is_admin(self.auth, requester.user)
114114

115-
logging.info("Quarantining local media by ID: %s/%s", server_name, media_id)
115+
logging.info("Quarantining media by ID: %s/%s", server_name, media_id)
116116

117117
# Quarantine this media id
118118
await self.store.quarantine_media_by_id(
@@ -140,9 +140,7 @@ async def on_POST(
140140
) -> Tuple[int, JsonDict]:
141141
await assert_requester_is_admin(self.auth, request)
142142

143-
logging.info(
144-
"Remove from quarantine local media by ID: %s/%s", server_name, media_id
145-
)
143+
logging.info("Remove from quarantine media by ID: %s/%s", server_name, media_id)
146144

147145
# Remove from quarantine this media id
148146
await self.store.quarantine_media_by_id(server_name, media_id, None)

synapse/rest/media/v1/media_repository.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -919,10 +919,14 @@ async def _apply_media_retention_rules(self) -> None:
919919
await self.delete_old_local_media(
920920
before_ts=local_media_threshold_timestamp_ms,
921921
keep_profiles=True,
922+
delete_quarantined_media=False,
923+
delete_protected_media=False,
922924
)
923925

924926
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
925-
old_media = await self.store.get_remote_media_before(before_ts)
927+
old_media = await self.store.get_remote_media_ids(
928+
before_ts, include_quarantined_media=False
929+
)
926930

927931
deleted = 0
928932

@@ -975,25 +979,31 @@ async def delete_old_local_media(
975979
before_ts: int,
976980
size_gt: int = 0,
977981
keep_profiles: bool = True,
982+
delete_quarantined_media: bool = False,
983+
delete_protected_media: bool = False,
978984
) -> Tuple[List[str], int]:
979985
"""
980986
Delete local or remote media from this server by size and timestamp. Removes
981987
media files, any thumbnails and cached URLs.
982988
983989
Args:
984990
before_ts: Unix timestamp in ms.
985-
Files that were last used before this timestamp will be deleted
986-
size_gt: Size of the media in bytes. Files that are larger will be deleted
991+
Files that were last used before this timestamp will be deleted.
992+
size_gt: Size of the media in bytes. Files that are larger will be deleted.
987993
keep_profiles: Switch to delete also files that are still used in image data
988-
(e.g user profile, room avatar)
989-
If false these files will be deleted
994+
(e.g user profile, room avatar). If false these files will be deleted.
995+
delete_quarantined_media: If True, media marked as quarantined will be deleted.
996+
delete_protected_media: If True, media marked as protected will be deleted.
997+
990998
Returns:
991999
A tuple of (list of deleted media IDs, total deleted media IDs).
9921000
"""
993-
old_media = await self.store.get_local_media_before(
1001+
old_media = await self.store.get_local_media_ids(
9941002
before_ts,
9951003
size_gt,
9961004
keep_profiles,
1005+
include_quarantined_media=delete_quarantined_media,
1006+
include_protected_media=delete_protected_media,
9971007
)
9981008
return await self._remove_local_media_from_disk(old_media)
9991009

synapse/storage/databases/main/media_repository.py

+63-5
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,36 @@ def get_local_media_by_user_paginate_txn(
251251
"get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
252252
)
253253

254-
async def get_local_media_before(
254+
async def get_local_media_ids(
255255
self,
256256
before_ts: int,
257257
size_gt: int,
258258
keep_profiles: bool,
259+
include_quarantined_media: bool,
260+
include_protected_media: bool,
259261
) -> List[str]:
262+
"""
263+
Retrieve a list of media IDs from the local media store.
264+
265+
Args:
266+
before_ts: Only retrieve IDs from media that was either last accessed
267+
(or if never accessed, created) before the given UNIX timestamp in ms.
268+
size_gt: Only retrieve IDs from media that has a size (in bytes) greater than
269+
the given integer.
270+
keep_profiles: If True, exclude media IDs from the results that are used in the
271+
following situations:
272+
* global profile user avatar
273+
* per-room profile user avatar
274+
* room avatar
275+
* a user's avatar in the user directory
276+
include_quarantined_media: If False, exclude media IDs from the results that have
277+
been marked as quarantined.
278+
include_protected_media: If False, exclude media IDs from the results that have
279+
been marked as protected from quarantine.
280+
281+
Returns:
282+
A list of local media IDs.
283+
"""
260284

261285
# to find files that have never been accessed (last_access_ts IS NULL)
262286
# compare with `created_ts`
@@ -294,12 +318,24 @@ async def get_local_media_before(
294318
)
295319
sql += sql_keep
296320

297-
def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]:
321+
if include_quarantined_media is False:
322+
# Do not include media that has been quarantined
323+
sql += """
324+
AND quarantined_by IS NULL
325+
"""
326+
327+
if include_protected_media is False:
328+
# Do not include media that has been protected from quarantine
329+
sql += """
330+
AND safe_from_quarantine = false
331+
"""
332+
333+
def _get_local_media_ids_txn(txn: LoggingTransaction) -> List[str]:
298334
txn.execute(sql, (before_ts, before_ts, size_gt))
299335
return [row[0] for row in txn]
300336

301337
return await self.db_pool.runInteraction(
302-
"get_local_media_before", _get_local_media_before_txn
338+
"get_local_media_ids", _get_local_media_ids_txn
303339
)
304340

305341
async def store_local_media(
@@ -599,15 +635,37 @@ async def store_remote_media_thumbnail(
599635
desc="store_remote_media_thumbnail",
600636
)
601637

602-
async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]:
638+
async def get_remote_media_ids(
639+
self, before_ts: int, include_quarantined_media: bool
640+
) -> List[Dict[str, str]]:
641+
"""
642+
Retrieve a list of server name, media ID tuples from the remote media cache.
643+
644+
Args:
645+
before_ts: Only retrieve IDs from media that was either last accessed
646+
(or if never accessed, created) before the given UNIX timestamp in ms.
647+
include_quarantined_media: If False, exclude media IDs from the results that have
648+
been marked as quarantined.
649+
650+
Returns:
651+
A list of tuples containing:
652+
* The server name of homeserver where the media originates from,
653+
* The ID of the media.
654+
"""
603655
sql = (
604656
"SELECT media_origin, media_id, filesystem_id"
605657
" FROM remote_media_cache"
606658
" WHERE last_access_ts < ?"
607659
)
608660

661+
if include_quarantined_media is False:
662+
# Only include media that has not been quarantined
663+
sql += """
664+
AND quarantined_by IS NULL
665+
"""
666+
609667
return await self.db_pool.execute(
610-
"get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
668+
"get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts
611669
)
612670

613671
async def delete_remote_media(self, media_origin: str, media_id: str) -> None:

tests/rest/media/test_media_retention.py

+96-13
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,16 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
5353
# Create a user to upload media with
5454
test_user_id = self.register_user("alice", "password")
5555

56-
# Inject media (3 images each; recently accessed, old access, never accessed)
57-
# into both the local store and the remote cache
56+
# Inject media (recently accessed, old access, never accessed, old access
57+
# quarantined media) into both the local store and the remote cache, plus
58+
# one additional local media that is marked as protected from quarantine.
5859
media_repository = hs.get_media_repository()
5960
test_media_content = b"example string"
6061

61-
def _create_media_and_set_last_accessed(
62+
def _create_media_and_set_attributes(
6263
last_accessed_ms: Optional[int],
64+
is_quarantined: Optional[bool] = False,
65+
is_protected: Optional[bool] = False,
6366
) -> str:
6467
# "Upload" some media to the local media store
6568
mxc_uri = self.get_success(
@@ -84,10 +87,31 @@ def _create_media_and_set_last_accessed(
8487
)
8588
)
8689

90+
if is_quarantined:
91+
# Mark this media as quarantined
92+
self.get_success(
93+
self.store.quarantine_media_by_id(
94+
server_name=self.hs.config.server.server_name,
95+
media_id=media_id,
96+
quarantined_by="@theadmin:test",
97+
)
98+
)
99+
100+
if is_protected:
101+
# Mark this media as protected from quarantine
102+
self.get_success(
103+
self.store.mark_local_media_as_safe(
104+
media_id=media_id,
105+
safe=True,
106+
)
107+
)
108+
87109
return media_id
88110

89-
def _cache_remote_media_and_set_last_accessed(
90-
media_id: str, last_accessed_ms: Optional[int]
111+
def _cache_remote_media_and_set_attributes(
112+
media_id: str,
113+
last_accessed_ms: Optional[int],
114+
is_quarantined: Optional[bool] = False,
91115
) -> str:
92116
# Pretend to cache some remote media
93117
self.get_success(
@@ -112,23 +136,58 @@ def _cache_remote_media_and_set_last_accessed(
112136
)
113137
)
114138

139+
if is_quarantined:
140+
# Mark this media as quarantined
141+
self.get_success(
142+
self.store.quarantine_media_by_id(
143+
server_name=self.remote_server_name,
144+
media_id=media_id,
145+
quarantined_by="@theadmin:test",
146+
)
147+
)
148+
115149
return media_id
116150

117151
# Start with the local media store
118-
self.local_recently_accessed_media = _create_media_and_set_last_accessed(
119-
self.THIRTY_DAYS_IN_MS
152+
self.local_recently_accessed_media = _create_media_and_set_attributes(
153+
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
120154
)
121-
self.local_not_recently_accessed_media = _create_media_and_set_last_accessed(
122-
self.ONE_DAY_IN_MS
155+
self.local_not_recently_accessed_media = _create_media_and_set_attributes(
156+
last_accessed_ms=self.ONE_DAY_IN_MS,
157+
)
158+
self.local_not_recently_accessed_quarantined_media = (
159+
_create_media_and_set_attributes(
160+
last_accessed_ms=self.ONE_DAY_IN_MS,
161+
is_quarantined=True,
162+
)
163+
)
164+
self.local_not_recently_accessed_protected_media = (
165+
_create_media_and_set_attributes(
166+
last_accessed_ms=self.ONE_DAY_IN_MS,
167+
is_protected=True,
168+
)
169+
)
170+
self.local_never_accessed_media = _create_media_and_set_attributes(
171+
last_accessed_ms=None,
123172
)
124-
self.local_never_accessed_media = _create_media_and_set_last_accessed(None)
125173

126174
# And now the remote media store
127-
self.remote_recently_accessed_media = _cache_remote_media_and_set_last_accessed(
128-
"a", self.THIRTY_DAYS_IN_MS
175+
self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
176+
media_id="a",
177+
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
129178
)
130179
self.remote_not_recently_accessed_media = (
131-
_cache_remote_media_and_set_last_accessed("b", self.ONE_DAY_IN_MS)
180+
_cache_remote_media_and_set_attributes(
181+
media_id="b",
182+
last_accessed_ms=self.ONE_DAY_IN_MS,
183+
)
184+
)
185+
self.remote_not_recently_accessed_quarantined_media = (
186+
_cache_remote_media_and_set_attributes(
187+
media_id="c",
188+
last_accessed_ms=self.ONE_DAY_IN_MS,
189+
is_quarantined=True,
190+
)
132191
)
133192
# Remote media will always have a "last accessed" attribute, as it would not
134193
# be fetched from the remote homeserver unless instigated by a user.
@@ -163,8 +222,20 @@ def test_local_media_retention(self) -> None:
163222
],
164223
not_purged=[
165224
(self.hs.config.server.server_name, self.local_recently_accessed_media),
225+
(
226+
self.hs.config.server.server_name,
227+
self.local_not_recently_accessed_quarantined_media,
228+
),
229+
(
230+
self.hs.config.server.server_name,
231+
self.local_not_recently_accessed_protected_media,
232+
),
166233
(self.remote_server_name, self.remote_recently_accessed_media),
167234
(self.remote_server_name, self.remote_not_recently_accessed_media),
235+
(
236+
self.remote_server_name,
237+
self.remote_not_recently_accessed_quarantined_media,
238+
),
168239
],
169240
)
170241

@@ -199,6 +270,18 @@ def test_remote_media_cache_retention(self) -> None:
199270
self.hs.config.server.server_name,
200271
self.local_not_recently_accessed_media,
201272
),
273+
(
274+
self.hs.config.server.server_name,
275+
self.local_not_recently_accessed_quarantined_media,
276+
),
277+
(
278+
self.hs.config.server.server_name,
279+
self.local_not_recently_accessed_protected_media,
280+
),
281+
(
282+
self.remote_server_name,
283+
self.remote_not_recently_accessed_quarantined_media,
284+
),
202285
(self.hs.config.server.server_name, self.local_never_accessed_media),
203286
],
204287
)

0 commit comments

Comments
 (0)