Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit eba98fb

Browse files
authored
Add functions to MultiWriterIdGen used by events stream (#8164)
1 parent 5099bd6 commit eba98fb

File tree

4 files changed

+145
-3
lines changed

4 files changed

+145
-3
lines changed

changelog.d/8164.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add functions to `MultiWriterIdGen` used by events stream.

synapse/storage/util/id_generators.py

+101-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@
1414
# limitations under the License.
1515

1616
import contextlib
17+
import heapq
1718
import threading
1819
from collections import deque
19-
from typing import Dict, Set
20+
from typing import Dict, List, Set
2021

2122
from typing_extensions import Deque
2223

@@ -210,6 +211,23 @@ def __init__(
210211
# should be less than the minimum of this set (if not empty).
211212
self._unfinished_ids = set() # type: Set[int]
212213

214+
# We track the max position where we know everything before has been
215+
# persisted. This is done by a) looking at the min across all instances
216+
# and b) noting that if we have seen a run of persisted positions
217+
# without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7).
218+
#
219+
# Note: There is no guarentee that the IDs generated by the sequence
220+
# will be gapless; gaps can form when e.g. a transaction was rolled
221+
# back. This means that sometimes we won't be able to skip forward the
222+
# position even though everything has been persisted. However, since
223+
# gaps should be relatively rare it's still worth doing the book keeping
224+
# that allows us to skip forwards when there are gapless runs of
225+
# positions.
226+
self._persisted_upto_position = (
227+
min(self._current_positions.values()) if self._current_positions else 0
228+
)
229+
self._known_persisted_positions = [] # type: List[int]
230+
213231
self._sequence_gen = PostgresSequenceGenerator(sequence_name)
214232

215233
def _load_current_ids(
@@ -234,9 +252,12 @@ def _load_current_ids(
234252

235253
return current_positions
236254

237-
def _load_next_id_txn(self, txn):
255+
def _load_next_id_txn(self, txn) -> int:
238256
return self._sequence_gen.get_next_id_txn(txn)
239257

258+
def _load_next_mult_id_txn(self, txn, n: int) -> List[int]:
259+
return self._sequence_gen.get_next_mult_txn(txn, n)
260+
240261
async def get_next(self):
241262
"""
242263
Usage:
@@ -262,6 +283,34 @@ def manager():
262283

263284
return manager()
264285

286+
async def get_next_mult(self, n: int):
287+
"""
288+
Usage:
289+
with await stream_id_gen.get_next_mult(5) as stream_ids:
290+
# ... persist events ...
291+
"""
292+
next_ids = await self._db.runInteraction(
293+
"_load_next_mult_id", self._load_next_mult_id_txn, n
294+
)
295+
296+
# Assert the fetched ID is actually greater than any ID we've already
297+
# seen. If not, then the sequence and table have got out of sync
298+
# somehow.
299+
assert max(self.get_positions().values(), default=0) < min(next_ids)
300+
301+
with self._lock:
302+
self._unfinished_ids.update(next_ids)
303+
304+
@contextlib.contextmanager
305+
def manager():
306+
try:
307+
yield next_ids
308+
finally:
309+
for i in next_ids:
310+
self._mark_id_as_finished(i)
311+
312+
return manager()
313+
265314
def get_next_txn(self, txn: LoggingTransaction):
266315
"""
267316
Usage:
@@ -326,3 +375,53 @@ def advance(self, instance_name: str, new_id: int):
326375
self._current_positions[instance_name] = max(
327376
new_id, self._current_positions.get(instance_name, 0)
328377
)
378+
379+
self._add_persisted_position(new_id)
380+
381+
def get_persisted_upto_position(self) -> int:
382+
"""Get the max position where all previous positions have been
383+
persisted.
384+
385+
Note: In the worst case scenario this will be equal to the minimum
386+
position across writers. This means that the returned position here can
387+
lag if one writer doesn't write very often.
388+
"""
389+
390+
with self._lock:
391+
return self._persisted_upto_position
392+
393+
def _add_persisted_position(self, new_id: int):
394+
"""Record that we have persisted a position.
395+
396+
This is used to keep the `_current_positions` up to date.
397+
"""
398+
399+
# We require that the lock is locked by caller
400+
assert self._lock.locked()
401+
402+
heapq.heappush(self._known_persisted_positions, new_id)
403+
404+
# We move the current min position up if the minimum current positions
405+
# of all instances is higher (since by definition all positions less
406+
# that that have been persisted).
407+
min_curr = min(self._current_positions.values())
408+
self._persisted_upto_position = max(min_curr, self._persisted_upto_position)
409+
410+
# We now iterate through the seen positions, discarding those that are
411+
# less than the current min positions, and incrementing the min position
412+
# if its exactly one greater.
413+
#
414+
# This is also where we discard items from `_known_persisted_positions`
415+
# (to ensure the list doesn't infinitely grow).
416+
while self._known_persisted_positions:
417+
if self._known_persisted_positions[0] <= self._persisted_upto_position:
418+
heapq.heappop(self._known_persisted_positions)
419+
elif (
420+
self._known_persisted_positions[0] == self._persisted_upto_position + 1
421+
):
422+
heapq.heappop(self._known_persisted_positions)
423+
self._persisted_upto_position += 1
424+
else:
425+
# There was a gap in seen positions, so there is nothing more to
426+
# do.
427+
break

synapse/storage/util/sequence.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515
import abc
1616
import threading
17-
from typing import Callable, Optional
17+
from typing import Callable, List, Optional
1818

1919
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
2020
from synapse.storage.types import Cursor
@@ -39,6 +39,12 @@ def get_next_id_txn(self, txn: Cursor) -> int:
3939
txn.execute("SELECT nextval(?)", (self._sequence_name,))
4040
return txn.fetchone()[0]
4141

42+
def get_next_mult_txn(self, txn: Cursor, n: int) -> List[int]:
43+
txn.execute(
44+
"SELECT nextval(?) FROM generate_series(1, ?)", (self._sequence_name, n)
45+
)
46+
return [i for (i,) in txn]
47+
4248

4349
GetFirstCallbackType = Callable[[Cursor], int]
4450

tests/storage/test_id_generators.py

+36
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,39 @@ def _get_next_txn(txn):
182182

183183
self.assertEqual(id_gen.get_positions(), {"master": 8})
184184
self.assertEqual(id_gen.get_current_token_for_writer("master"), 8)
185+
186+
def test_get_persisted_upto_position(self):
187+
"""Test that `get_persisted_upto_position` correctly tracks updates to
188+
positions.
189+
"""
190+
191+
self._insert_rows("first", 3)
192+
self._insert_rows("second", 5)
193+
194+
id_gen = self._create_id_generator("first")
195+
196+
# Min is 3 and there is a gap between 5, so we expect it to be 3.
197+
self.assertEqual(id_gen.get_persisted_upto_position(), 3)
198+
199+
# We advance "first" straight to 6. Min is now 5 but there is no gap so
200+
# we expect it to be 6
201+
id_gen.advance("first", 6)
202+
self.assertEqual(id_gen.get_persisted_upto_position(), 6)
203+
204+
# No gap, so we expect 7.
205+
id_gen.advance("second", 7)
206+
self.assertEqual(id_gen.get_persisted_upto_position(), 7)
207+
208+
# We haven't seen 8 yet, so we expect 7 still.
209+
id_gen.advance("second", 9)
210+
self.assertEqual(id_gen.get_persisted_upto_position(), 7)
211+
212+
# Now that we've seen 7, 8 and 9 we can got straight to 9.
213+
id_gen.advance("first", 8)
214+
self.assertEqual(id_gen.get_persisted_upto_position(), 9)
215+
216+
# Jump forward with gaps. The minimum is 11, even though we haven't seen
217+
# 10 we know that everything before 11 must be persisted.
218+
id_gen.advance("first", 11)
219+
id_gen.advance("second", 15)
220+
self.assertEqual(id_gen.get_persisted_upto_position(), 11)

0 commit comments

Comments
 (0)