Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 22cc93a

Browse files
Enable Faster Remote Room Joins against worker-mode Synapse. (#14752)
* Enable Complement tests for Faster Remote Room Joins on worker-mode * (dangerous) Add an override to allow Complement to use FRRJ under workers * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) <[email protected]> * Fix race where we didn't send out replication notification * MORE HACKS * Fix get_un_partial_stated_rooms_token to take instance_name * Fix bad merge * Remove warning * Correctly advance un_partial_stated_room_stream * Fix merge * Add another notify_replication * Fixups * Create a separate ReplicationNotifier * Fix test * Fix portdb * Create a separate ReplicationNotifier * Fix test * Fix portdb * Fix presence test * Newsfile * Apply suggestions from code review * Update changelog.d/14752.misc Co-authored-by: Erik Johnston <[email protected]> * lint Signed-off-by: Olivier Wilkinson (reivilibre) <[email protected]> Co-authored-by: Erik Johnston <[email protected]>
1 parent d329a56 commit 22cc93a

File tree

10 files changed

+35
-36
lines changed

10 files changed

+35
-36
lines changed

changelog.d/14752.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Enable Complement tests for Faster Remote Room Joins against worker-mode Synapse.

docker/complement/conf/workers-shared-extra.yaml.j2

-2
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,8 @@ allow_device_name_lookup_over_federation: true
9494
experimental_features:
9595
# Enable history backfilling support
9696
msc2716_enabled: true
97-
{% if not workers_in_use %}
9897
# client-side support for partial state in /send_join responses
9998
faster_joins: true
100-
{% endif %}
10199
# Enable support for polls
102100
msc3381_polls_enabled: true
103101
# Enable deleting device-specific notification settings stored in account data

scripts-dev/complement.sh

+4-7
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ fi
190190

191191
extra_test_args=()
192192

193-
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930"
193+
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930,faster_joins"
194194

195195
# All environment variables starting with PASS_ will be shared.
196196
# (The prefix is stripped off before reaching the container.)
@@ -223,12 +223,9 @@ else
223223
export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite
224224
fi
225225

226-
# We only test faster room joins on monoliths, because they are purposefully
227-
# being developed without worker support to start with.
228-
#
229-
# The tests for importing historical messages (MSC2716) also only pass with monoliths,
230-
# currently.
231-
test_tags="$test_tags,faster_joins,msc2716"
226+
# The tests for importing historical messages (MSC2716)
227+
# only pass with monoliths, currently.
228+
test_tags="$test_tags,msc2716"
232229
fi
233230

234231

synapse/app/generic_worker.py

-7
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,6 @@ def start(config_options: List[str]) -> None:
282282
"synapse.app.user_dir",
283283
)
284284

285-
if config.experimental.faster_joins_enabled:
286-
raise ConfigError(
287-
"You have enabled the experimental `faster_joins` config option, but it is "
288-
"not compatible with worker deployments yet. Please disable `faster_joins` "
289-
"or run Synapse as a single process deployment instead."
290-
)
291-
292285
synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
293286
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
294287

synapse/handlers/device.py

+2
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,7 @@ def __init__(self, hs: "HomeServer", device_handler: DeviceHandler):
974974
self.federation = hs.get_federation_client()
975975
self.clock = hs.get_clock()
976976
self.device_handler = device_handler
977+
self._notifier = hs.get_notifier()
977978

978979
self._remote_edu_linearizer = Linearizer(name="remote_device_list")
979980

@@ -1054,6 +1055,7 @@ async def incoming_device_list_update(
10541055
user_id,
10551056
device_id,
10561057
)
1058+
self._notifier.notify_replication()
10571059

10581060
room_ids = await self.store.get_rooms_for_user(user_id)
10591061
if not room_ids:

synapse/handlers/federation.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1870,14 +1870,15 @@ async def _sync_partial_state_room(
18701870
logger.info("Clearing partial-state flag for %s", room_id)
18711871
success = await self.store.clear_partial_state_room(room_id)
18721872

1873+
# Poke the notifier so that other workers see the write to
1874+
# the un-partial-stated rooms stream.
1875+
self._notifier.notify_replication()
1876+
18731877
if success:
18741878
logger.info("State resync complete for %s", room_id)
18751879
self._storage_controllers.state.notify_room_un_partial_stated(
18761880
room_id
18771881
)
1878-
# Poke the notifier so that other workers see the write to
1879-
# the un-partial-stated rooms stream.
1880-
self._notifier.notify_replication()
18811882

18821883
# TODO(faster_joins) update room stats and user directory?
18831884
# https://github.com/matrix-org/synapse/issues/12814

synapse/replication/tcp/streams/partial_state.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import attr
1717

1818
from synapse.replication.tcp.streams import Stream
19-
from synapse.replication.tcp.streams._base import current_token_without_instance
2019

2120
if TYPE_CHECKING:
2221
from synapse.server import HomeServer
@@ -42,8 +41,7 @@ def __init__(self, hs: "HomeServer"):
4241
store = hs.get_datastores().main
4342
super().__init__(
4443
hs.get_instance_name(),
45-
# TODO(faster_joins, multiple writers): we need to account for instance names
46-
current_token_without_instance(store.get_un_partial_stated_rooms_token),
44+
store.get_un_partial_stated_rooms_token,
4745
store.get_un_partial_stated_rooms_from_stream,
4846
)
4947

@@ -70,7 +68,6 @@ def __init__(self, hs: "HomeServer"):
7068
store = hs.get_datastores().main
7169
super().__init__(
7270
hs.get_instance_name(),
73-
# TODO(faster_joins, multiple writers): we need to account for instance names
74-
current_token_without_instance(store.get_un_partial_stated_events_token),
71+
store.get_un_partial_stated_events_token,
7572
store.get_un_partial_stated_events_from_stream,
7673
)

synapse/storage/databases/main/events_worker.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,12 @@ def get_chain_id_txn(txn: Cursor) -> int:
322322
"stream_id",
323323
)
324324

325-
def get_un_partial_stated_events_token(self) -> int:
326-
# TODO(faster_joins, multiple writers): This is inappropriate if there are multiple
327-
# writers because workers that don't write often will hold all
328-
# readers up.
329-
return self._un_partial_stated_events_stream_id_gen.get_current_token()
325+
def get_un_partial_stated_events_token(self, instance_name: str) -> int:
326+
return (
327+
self._un_partial_stated_events_stream_id_gen.get_current_token_for_writer(
328+
instance_name
329+
)
330+
)
330331

331332
async def get_un_partial_stated_events_from_stream(
332333
self, instance_name: str, last_id: int, current_id: int, limit: int
@@ -416,6 +417,8 @@ def process_replication_position(
416417
self._stream_id_gen.advance(instance_name, token)
417418
elif stream_name == BackfillStream.NAME:
418419
self._backfill_id_gen.advance(instance_name, -token)
420+
elif stream_name == UnPartialStatedEventStream.NAME:
421+
self._un_partial_stated_events_stream_id_gen.advance(instance_name, token)
419422
super().process_replication_position(stream_name, instance_name, token)
420423

421424
async def have_censored_event(self, event_id: str) -> bool:

synapse/storage/databases/main/room.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from synapse.api.room_versions import RoomVersion, RoomVersions
4444
from synapse.config.homeserver import HomeServerConfig
4545
from synapse.events import EventBase
46+
from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream
4647
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
4748
from synapse.storage.database import (
4849
DatabasePool,
@@ -144,6 +145,13 @@ def __init__(
144145
"stream_id",
145146
)
146147

148+
def process_replication_position(
149+
self, stream_name: str, instance_name: str, token: int
150+
) -> None:
151+
if stream_name == UnPartialStatedRoomStream.NAME:
152+
self._un_partial_stated_rooms_stream_id_gen.advance(instance_name, token)
153+
return super().process_replication_position(stream_name, instance_name, token)
154+
147155
async def store_room(
148156
self,
149157
room_id: str,
@@ -1281,13 +1289,10 @@ async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
12811289
)
12821290
return result["join_event_id"], result["device_lists_stream_id"]
12831291

1284-
def get_un_partial_stated_rooms_token(self) -> int:
1285-
# TODO(faster_joins, multiple writers): This is inappropriate if there
1286-
# are multiple writers because workers that don't write often will
1287-
# hold all readers up.
1288-
# (See `MultiWriterIdGenerator.get_persisted_upto_position` for an
1289-
# explanation.)
1290-
return self._un_partial_stated_rooms_stream_id_gen.get_current_token()
1292+
def get_un_partial_stated_rooms_token(self, instance_name: str) -> int:
1293+
return self._un_partial_stated_rooms_stream_id_gen.get_current_token_for_writer(
1294+
instance_name
1295+
)
12911296

12921297
async def get_un_partial_stated_rooms_from_stream(
12931298
self, instance_name: str, last_id: int, current_id: int, limit: int

synapse/storage/databases/main/state.py

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def process_replication_rows(
9595
for row in rows:
9696
assert isinstance(row, UnPartialStatedEventStreamRow)
9797
self._get_state_group_for_event.invalidate((row.event_id,))
98+
self.is_partial_state_event.invalidate((row.event_id,))
9899

99100
super().process_replication_rows(stream_name, instance_name, token, rows)
100101

@@ -485,6 +486,7 @@ def _update_state_for_partial_state_event_txn(
485486
"rejection_status_changed": rejection_status_changed,
486487
},
487488
)
489+
txn.call_after(self.hs.get_notifier().on_new_replication_data)
488490

489491

490492
class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):

0 commit comments

Comments
 (0)